initial commit
[jalview.git] / forester / archive / RIO / others / puzzle_dqo / src / puzzle2.c
1 /*
2  * puzzle2.c
3  *
4  *
5  * Part of TREE-PUZZLE 5.0 (June 2000)
6  *
7  * (c) 1999-2000 by Heiko A. Schmidt, Korbinian Strimmer,
8  *                  M. Vingron, and Arndt von Haeseler
9  * (c) 1995-1999 by Korbinian Strimmer and Arndt von Haeseler
10  *
11  * All parts of the source except where indicated are distributed under
12  * the GNU public licence.  See http://www.opensource.org for details.
13  */
14
15
16 #define EXTERN extern
17
18 #include "puzzle.h"
19 #include <string.h>
20
21 #if PARALLEL
22 #       include "sched.h"
23 #endif /* PARALLEL */
24
25
26 /******************************************************************************/
27 /* sequences                                                                  */
28 /******************************************************************************/
29
30 /* read ten characters of current line as identifier */
31 void readid(FILE *infp, int t)
32 {
33         int i, j, flag, ci;
34
35         for (i = 0; i < 26; i++) { /*CZ*/
36                 ci = fgetc(infp);
37                 if (ci == EOF || !isprint(ci)) {
38                         FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (no name for sequence %d)\n\n\n", t+1);
39                         exit(1);
40                 }
41                 Identif[t][i] = (char) ci;
42         }       
43         /* convert leading blanks in taxon name to underscores */
44         flag = FALSE;
45         for (i = 25; i > -1; i--) { /*CZ*/
46                 if (flag == FALSE) {
47                         if (Identif[t][i] != ' ') flag = TRUE; 
48                 } else {
49                         if (Identif[t][i] == ' ') Identif[t][i] = '_';
50                 }
51         }
52         /* check whether this name is already used */
53         for (i = 0; i < t; i++) { /* compare with all other taxa */
54                 flag = TRUE; /* assume identity */
55                 for (j = 0; (j < 26) && (flag == TRUE); j++) /*CZ*/
56                         if (Identif[t][j] != Identif[i][j])
57                                 flag = FALSE;
58                 if (flag) {
59                         FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (multiple occurence of sequence name '");
60                         fputid(STDOUT, t);
61                         FPRINTF(STDOUTFILE "')\n\n\n");
62                         exit(1);
63                 }
64         }
65 }
66
67 /* read next allowed character */
68 char readnextcharacter(FILE *ifp, int notu, int nsite)
69 {
70         char c;
71
72         /* ignore blanks and control characters except newline */
73         do {
74                 if (fscanf(ifp, "%c", &c) != 1) {
75                         FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (missing character at position %d in sequence '", nsite + 1);
76                         fputid(STDOUT, notu);
77                         FPRINTF(STDOUTFILE "')\n\n\n");
78                         exit(1);
79                 }
80         } while (c == ' ' || (iscntrl((int) c) && c != '\n'));
81         return c;
82 }
83
84 /* skip rest of the line */
85 void skiprestofline(FILE* ifp, int notu, int nsite)
86 {
87         int ci;
88
89         /* read chars until the first newline */
90         do{
91                 ci = fgetc(ifp);
92                 if (ci == EOF) {
93                         FPRINTF(STDOUTFILE "Unable to proceed (missing newline at position %d in sequence '", nsite + 1);
94                         fputid(STDOUT, notu);
95                         FPRINTF(STDOUTFILE "')\n\n\n");
96                         exit(1);
97                 }
98         } while ((char) ci != '\n');
99 }
100
101 /* skip control characters and blanks */
102 void skipcntrl(FILE *ifp, int notu, int nsite)
103 {
104         int ci;
105
106         /* read over all control characters and blanks */
107         do {
108                 ci = fgetc(ifp);
109                 if (ci == EOF) {
110                         FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (missing character at position %d in sequence '", nsite + 1);
111                         fputid(STDOUT, notu);
112                         FPRINTF(STDOUTFILE "')\n\n\n");
113                         exit(1);
114                 }
115         } while (iscntrl(ci) || (char) ci == ' ');
116         /* go one character back */
117         if (ungetc(ci, ifp) == EOF) {
118                 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (positioning error at position %d in sequence '", nsite + 1);
119                 fputid(STDOUT, notu);
120                 FPRINTF(STDOUTFILE "')\n\n\n");
121                 exit(1);
122         }
123 }
124
125 /* read sequences of one data set */
126 void getseqs(FILE *ifp)
127 {
128         int notu, nsite, endofline, linelength, i;
129         char c;
130         
131         seqchars = new_cmatrix(Maxspc, Maxseqc);
132         /* read all characters */
133         nsite = 0; /* next site to be read */
134         while (nsite < Maxseqc) {
135                 /* read first taxon */
136                 notu = 0;
137                 /* go to next true line */
138                 skiprestofline(ifp, notu, nsite);
139                 skipcntrl(ifp, notu, nsite);
140                 if (nsite == 0) readid(ifp, notu);
141                 endofline = FALSE;
142                 linelength = 0;         
143                 do {
144                         c = readnextcharacter(ifp, notu, nsite + linelength);
145                         if (c == '\n') endofline = TRUE;
146                         else if (c == '.') {
147                                 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (invalid character '.' at position ");
148                                 FPRINTF(STDOUTFILE "%d in first sequence)\n\n\n", nsite + linelength + 1);
149                                 exit(1);
150                         } else if (nsite + linelength < Maxseqc) {
151                                 /* change to upper case */
152                                 seqchars[notu][nsite + linelength] = (char) toupper((int) c);
153                                 linelength++;
154                         } else {
155                                 endofline = TRUE;
156                                 skiprestofline(ifp, notu, nsite + linelength);
157                         }
158                 } while (!endofline);   
159                 if (linelength == 0) {
160                         FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (line with length 0 at position %d in sequence '", nsite + 1);
161                         fputid(STDOUT, notu);
162                         FPRINTF(STDOUTFILE "')\n\n\n");
163                         exit(1);
164                 }
165                 /* read other taxa */
166                 for (notu = 1; notu < Maxspc; notu++) {
167                         /* go to next true line */
168                         if (notu != 1) skiprestofline(ifp, notu, nsite);
169                         skipcntrl(ifp, notu, nsite);
170                         if (nsite == 0) readid(ifp, notu);
171                         for (i = nsite; i < nsite + linelength; i++) {
172                                 c = readnextcharacter(ifp, notu, i);
173                                 if (c == '\n') { /* too short */
174                                         FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (line to short at position %d in sequence '", i + 1);
175                                         fputid(STDOUT, notu);
176                                         FPRINTF(STDOUTFILE "')\n\n\n");
177                                         exit(1);
178                                 } else if (c == '.') {
179                                         seqchars[notu][i] = seqchars[0][i];
180                                 } else {
181                                         /* change to upper case */
182                                         seqchars[notu][i] = (char) toupper((int) c);
183                                 }
184                         }
185                 }
186                 nsite = nsite + linelength;
187         }
188 }
189
190 /* initialize identifer array */
191 void initid(int t)
192 {
193         int i, j;
194
195         Identif = new_cmatrix(t, 26); /*CZ*/
196         for (i = 0; i < t; i++)
197                 for (j = 0; j < 26; j++) /*CZ*/
198                         Identif[i][j] = ' ';
199 }
200
201 /* print identifier of specified taxon in full 10 char length */
202 void fputid10(FILE *ofp, int t)
203 {       
204         int i;
205
206         for (i = 0; i < 26; i++) fputc(Identif[t][i], ofp); /*CZ*/
207 }
208
209 /* print identifier of specified taxon up to first space */
210 int fputid(FILE *ofp, int t)
211 {       
212         int i;
213         
214         i = 0;
215         while (Identif[t][i] != ' ' && i < 26) {   /*CZ*/
216                 fputc(Identif[t][i], ofp);
217                 i++;
218         }
219         return i;
220 }
221
222 /* read first line of sequence data set */
223 void getsizesites(FILE *ifp)
224 {
225         if (fscanf(ifp, "%d", &Maxspc) != 1) {
226                         FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (missing number of sequences)\n\n\n");
227                         exit(1);
228         }
229         if (fscanf(ifp, "%d", &Maxseqc) != 1) {
230                         FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (missing number of sites)\n\n\n");
231                         exit(1);
232         }
233         
234         if (Maxspc < 4) {
235                 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (less than 4 sequences)\n\n\n");
236                 exit(1);
237         }
238         if (Maxspc > 8000) {  /*CZ*/
239                 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (more than 8000 sequences)\n\n\n");
240                 exit(1);
241         }
242         if (Maxseqc < 1) {
243                 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (no sequence sites)\n\n\n");
244                 exit(1);
245         }
246         Maxbrnch = 2*Maxspc - 3;
247 }
248
249 /* read one data set - PHYLIP interleaved */
250 void getdataset(FILE *ifp)
251 {
252         initid(Maxspc);
253         getseqs(ifp);
254 }
255
256 /* guess data type */
257 int guessdatatype()
258 {
259         uli numnucs, numchars, numbins;
260         int notu, nsite;
261         char c;
262         
263         /* count A, C, G, T, U, N */
264         numnucs = 0;
265         numchars = 0;
266         numbins = 0;
267         for (notu = 0; notu < Maxspc; notu++)
268                 for (nsite = 0; nsite < Maxseqc; nsite++) {
269                         c = seqchars[notu][nsite];
270                         if (c == 'A' || c == 'C' || c == 'G' ||
271                             c == 'T' || c == 'U' || c == 'N') numnucs++;
272                         if (c != '-' && c != '?') numchars++;
273                         if (c == '0' || c == '1') numbins++;
274                 }
275         if (numchars == 0) numchars = 1;
276         /* more than 85 % frequency means nucleotide data */
277         if ((double) numnucs / (double) numchars > 0.85) return 0;
278         else if ((double) numbins / (double) numchars > 0.2) return 2; 
279         else return 1;
280 }
281
282 /* translate characters into format used by ML engine */
283 void translatedataset()
284 {       
285         int notu, sn, co;
286         char c;
287         cvector code;
288         
289
290         /* determine Maxsite - number of ML sites per taxon */
291         if (data_optn == 0 && SH_optn) {
292                 if (SHcodon)
293                         Maxsite = Maxseqc / 3;
294                 else
295                         Maxsite = Maxseqc / 2; /* assume doublets */
296                 
297         } else
298                 Maxsite = Maxseqc;
299         if (data_optn == 0 && (Maxsite % 3) == 0  && !SH_optn) {        
300                 if (codon_optn == 1 || codon_optn == 2 || codon_optn == 3)
301                         Maxsite = Maxsite / 3; /* only one of the three codon positions */
302                 if (codon_optn == 4)
303                         Maxsite = 2*(Maxsite / 3); /* 1st + 2nd codon positions */
304         }
305         
306         /* reserve memory */
307         if (Seqchar != NULL) free_cmatrix(Seqchar);
308         Seqchar = new_cmatrix(Maxspc, Maxsite);
309
310         /* code length */
311         if (data_optn == 0 && SH_optn)
312                 code = new_cvector(2);
313         else
314                 code = new_cvector(1);
315         
316         /* decode characters */
317         if (data_optn == 0 && SH_optn) { /* SH doublets */
318                 
319                 for (notu = 0; notu < Maxspc; notu++) {
320                         for (sn = 0; sn < Maxsite; sn++) {
321                                         for (co = 0; co < 2; co++) {
322                                                 if (SHcodon)
323                                                         c = seqchars[notu][sn*3 + co];
324                                                 else
325                                                         c = seqchars[notu][sn*2 + co];
326                                                 code[co] = c;
327                                         }
328                                 Seqchar[notu][sn] = code2int(code);
329                         }
330                 }
331                 
332         } else if (!(data_optn == 0 && (Maxseqc % 3) == 0)) { /* use all */
333
334                 for (notu = 0; notu < Maxspc; notu++) {
335                         for (sn = 0; sn < Maxsite; sn++) {
336                                 code[0] = seqchars[notu][sn];
337                                 Seqchar[notu][sn] = code2int(code);
338                         }
339                 }
340
341         } else { /* codons */
342                 
343                 for (notu = 0; notu < Maxspc; notu++) {
344                         for (sn = 0; sn < Maxsite; sn++) {
345                                 if (codon_optn == 1 || codon_optn == 2 || codon_optn == 3)
346                                         code[0] = seqchars[notu][sn*3+codon_optn-1];
347                                 else if (codon_optn == 4) {
348                                         if ((sn % 2) == 0)
349                                                 code[0] = seqchars[notu][(sn/2)*3];
350                                         else
351                                                 code[0] = seqchars[notu][((sn-1)/2)*3+1];
352                                 } else
353                                         code[0] = seqchars[notu][sn];
354                                 Seqchar[notu][sn] = code2int(code);
355                         }
356                 }
357         
358         }
359         free_cvector(code);
360 }
361
362 /* estimate mean base frequencies from translated data set */
363 void estimatebasefreqs()
364 {
365         int tpmradix, i, j;
366         uli all, *gene;
367         
368         tpmradix = gettpmradix();
369         
370         if (Freqtpm != NULL) free_dvector(Freqtpm);
371         Freqtpm = new_dvector(tpmradix);
372         
373         if (Basecomp != NULL) free_imatrix(Basecomp);
374         Basecomp = new_imatrix(Maxspc, tpmradix);
375         
376         gene = (uli *) malloc((unsigned) ((tpmradix + 1) * sizeof(uli)));
377         if (gene == NULL) maerror("gene in estimatebasefreqs");
378         
379         for (i = 0; i < tpmradix + 1; i++) gene[i] = 0;
380         for (i = 0; i < Maxspc; i++)
381                 for (j = 0; j < tpmradix; j++) Basecomp[i][j] = 0;
382         for (i = 0; i < Maxspc; i++)
383                 for (j = 0; j < Maxsite; j++) {
384                         gene[(int) Seqchar[i][j]]++;
385                         if (Seqchar[i][j] != tpmradix) Basecomp[i][(int) Seqchar[i][j]]++;
386                         }
387
388         all = Maxspc * Maxsite - gene[tpmradix];
389         if (all != 0) { /* normal case */
390                 for (i = 0; i < tpmradix; i++)
391                         Freqtpm[i] = (double) gene[i] / (double) all;
392         } else { /* pathological case with no unique character in data set */
393                 for (i = 0; i < tpmradix; i++)
394                         Freqtpm[i] = 1.0 / (double) tpmradix;
395         }
396         
397         free(gene);
398         
399         Frequ_optn = TRUE;
400 }
401
402 /* guess model of substitution */
403 void guessmodel()
404 {
405         double c1, c2, c3, c4, c5, c6;
406         dvector f;
407         dmatrix a;
408         int i;
409
410         Dayhf_optn = FALSE;
411         Jtt_optn = TRUE;
412         mtrev_optn = FALSE;
413         cprev_optn = FALSE;
414         blosum62_optn = FALSE;
415         vtmv_optn = FALSE;
416         wag_optn = FALSE;
417         TSparam = 2.0;
418         YRparam = 1.0;
419         optim_optn = TRUE;
420         HKY_optn = TRUE;
421         TN_optn = FALSE;
422         
423         if (data_optn == 1) { /* amino acids */
424                 
425                 /* chi2 fit to amino acid frequencies */
426                 
427                 f = new_dvector(20);
428                 a = new_dmatrix(20,20);
429                 /* chi2 distance Dayhoff */
430                 dyhfdata(a, f);
431                 c1 = 0;
432                 for (i = 0; i < 20; i++)
433                         c1 = c1 + (Freqtpm[i]-f[i])*(Freqtpm[i]-f[i]);
434                 /* chi2 distance JTT */
435                 jttdata(a, f);
436                 c2 = 0;
437                 for (i = 0; i < 20; i++)
438                         c2 = c2 + (Freqtpm[i]-f[i])*(Freqtpm[i]-f[i]);
439                 /* chi2 distance mtREV */
440                 mtrevdata(a, f);
441                 c3 = 0;
442                 for (i = 0; i < 20; i++)
443                         c3 = c3 + (Freqtpm[i]-f[i])*(Freqtpm[i]-f[i]);
444                 /* chi2 distance VT */
445                 vtmvdata(a, f);
446                 c4 = 0;
447                 for (i = 0; i < 20; i++)
448                         c4 = c4 + (Freqtpm[i]-f[i])*(Freqtpm[i]-f[i]);
449                 /* chi2 distance WAG */
450                 wagdata(a, f);
451                 c5 = 0;
452                 for (i = 0; i < 20; i++)
453                         c5 = c5 + (Freqtpm[i]-f[i])*(Freqtpm[i]-f[i]);
454                 /* chi2 distance cpREV */
455                 cprev45data(a, f);
456                 c6 = 0;
457                 for (i = 0; i < 20; i++)
458                         c6 = c6 + (Freqtpm[i]-f[i])*(Freqtpm[i]-f[i]);
459
460                 free_dvector(f);
461                 free_dmatrix(a);
462
463 #ifndef CPREV
464                 if ((c1 < c2) && (c1 < c3) && (c1 < c4) && (c1 < c5)) {
465                         /* c1 -> Dayhoff */
466                         Dayhf_optn = TRUE;
467                         Jtt_optn = FALSE;
468                         mtrev_optn = FALSE;
469                         cprev_optn = FALSE;
470                         vtmv_optn = FALSE;
471                         wag_optn = FALSE;
472                         FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on nuclear DNA)\n");
473                 } else {
474                         if ((c2 < c3) && (c2 < c4) && (c2 < c5)) {
475                                 /* c2 -> JTT */
476                                 Dayhf_optn = FALSE;
477                                 Jtt_optn = TRUE;
478                                 mtrev_optn = FALSE;
479                                 cprev_optn = FALSE;
480                                 vtmv_optn = FALSE;
481                                 wag_optn = FALSE;
482                                 FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on nuclear DNA)\n");
483                         } else {
484                                 if ((c3 < c4) && (c3 < c5)) {
485                                         /* c3 -> mtREV */
486                                         Dayhf_optn = FALSE;
487                                         Jtt_optn = FALSE;
488                                         mtrev_optn = TRUE;
489                                         cprev_optn = FALSE;
490                                         vtmv_optn = FALSE;
491                                         wag_optn = FALSE;
492                                         FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on mtDNA)\n");
493                                 } else {
494                                         if ((c4 < c5)) {
495                                                 /* c4 -> VT */
496                                                 Dayhf_optn = FALSE;
497                                                 Jtt_optn = FALSE;
498                                                 mtrev_optn = FALSE;
499                                                 cprev_optn = FALSE;
500                                                 vtmv_optn = TRUE;
501                                                 wag_optn = FALSE;
502                                                 FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on nuclear DNA)\n");
503                                         } else {
504                                                         /* c5 -> WAG */
505                                                         Dayhf_optn = FALSE;
506                                                         Jtt_optn = FALSE;
507                                                         mtrev_optn = FALSE;
508                                                         cprev_optn = FALSE;
509                                                         vtmv_optn = FALSE;
510                                                         wag_optn = TRUE;
511                                                         FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on nuclear DNA)\n");
512                                         } /* if c4 else c5 */
513                                 } /* if c3 else c4 */
514                         } /* if c2 */
515                 } /* if c1 */
516
517 #else /* CPREV */
518
519                 if ((c1 < c2) && (c1 < c3) && (c1 < c4) && (c1 < c5) && (c1 < c6)) {
520                         /* c1 -> Dayhoff */
521                         Dayhf_optn = TRUE;
522                         Jtt_optn = FALSE;
523                         mtrev_optn = FALSE;
524                         cprev_optn = FALSE;
525                         vtmv_optn = FALSE;
526                         wag_optn = FALSE;
527                         FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on nuclear DNA)\n");
528                 } else {
529                         if ((c2 < c3) && (c2 < c4) && (c2 < c5) && (c2 < c6)) {
530                                 /* c2 -> JTT */
531                                 Dayhf_optn = FALSE;
532                                 Jtt_optn = TRUE;
533                                 mtrev_optn = FALSE;
534                                 cprev_optn = FALSE;
535                                 vtmv_optn = FALSE;
536                                 wag_optn = FALSE;
537                                 FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on nuclear DNA)\n");
538                         } else {
539                                 if ((c3 < c4) && (c3 < c5) && (c3 < c6)) {
540                                         /* c3 -> mtREV */
541                                         Dayhf_optn = FALSE;
542                                         Jtt_optn = FALSE;
543                                         mtrev_optn = TRUE;
544                                         cprev_optn = FALSE;
545                                         vtmv_optn = FALSE;
546                                         wag_optn = FALSE;
547                                         FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on mtDNA)\n");
548                                 } else {
549                                         if ((c4 < c5) && (c4 < c6)) {
550                                                 /* c4 -> VT */
551                                                 Dayhf_optn = FALSE;
552                                                 Jtt_optn = FALSE;
553                                                 mtrev_optn = FALSE;
554                                                 cprev_optn = FALSE;
555                                                 vtmv_optn = TRUE;
556                                                 wag_optn = FALSE;
557                                                 FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on nuclear DNA)\n");
558                                         } else {
559                                                 if (c5 < c6) {
560                                                         /* c5 -> WAG */
561                                                         Dayhf_optn = FALSE;
562                                                         Jtt_optn = FALSE;
563                                                         mtrev_optn = FALSE;
564                                                         cprev_optn = FALSE;
565                                                         vtmv_optn = FALSE;
566                                                         wag_optn = TRUE;
567                                                         FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on nuclear DNA)\n");
568                                                 } else {
569                                                         /* if (c6) */
570                                                         /* c6 -> cpREV */
571                                                         Dayhf_optn = FALSE;
572                                                         Jtt_optn = FALSE;
573                                                         mtrev_optn = FALSE;
574                                                         cprev_optn = TRUE;
575                                                         vtmv_optn = FALSE;
576                                                         wag_optn = FALSE;
577                                                         FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on cpDNA)\n");
578                                                 } /* if c5 else c6 */
579                                         } /* if c4 else c5 */
580                                 } /* if c3 else c4 */
581                         } /* if c2 */
582                 } /* if c1 */
583 #endif /* CPREV */
584
585         } else if (data_optn == 0) {
586                 FPRINTF(STDOUTFILE "(consists very likely of nucleotides)\n");
587         } else {
588                 FPRINTF(STDOUTFILE "(consists very likely of binary state data)\n");
589         }
590 } /* guessmodel */
591
592
593 /******************************************************************************/
594 /* functions for representing and building puzzling step trees                */
595 /******************************************************************************/
596
597 /* initialize tree with the following starting configuration
598
599                  2
600           0  +------- C(=2)
601   A(=0) -----+
602              +------- B(=1)
603                  1
604                                 */
605 void inittree()
606 {
607         int i;
608
609         /* allocate the memory for the whole tree */
610         
611         /* allocate memory for vector with all the edges of the tree */
612         edge = (ONEEDGE *) calloc(Maxbrnch, sizeof(ONEEDGE) );
613         if (edge == NULL) maerror("edge in inittree");
614
615         /* allocate memory for vector with edge numbers of leaves */ 
616         edgeofleaf = (int *) calloc(Maxspc, sizeof(int) );
617         if (edgeofleaf == NULL) maerror("edgeofleaf in inittree");
618         
619         /* allocate memory for all the edges the edge map */
620         for (i = 0; i < Maxbrnch; i++) {
621                 edge[i].edgemap = (int *) calloc(Maxbrnch, sizeof(int) );
622                 if (edge[i].edgemap == NULL) maerror("edgemap in inittree");
623         }
624                 
625         /* number all edges */
626         for (i = 0; i < Maxbrnch; i++) edge[i].numedge = i;
627         
628         /* initialize tree */
629         
630         nextedge = 3;
631         nextleaf = 3;
632         
633         /* edge maps */
634         (edge[0].edgemap)[0] = 0; /* you are on the right edge */
635         (edge[0].edgemap)[1] = 4; /* go down left for leaf 1 */
636         (edge[0].edgemap)[2] = 5; /* go down right for leaf 2 */
637         (edge[1].edgemap)[0] = 1; /* go up for leaf 0 */
638         (edge[1].edgemap)[1] = 0; /* you are on the right edge */
639         (edge[1].edgemap)[2] = 3; /* go up/down right for leaf 2 */
640         (edge[2].edgemap)[0] = 1; /* go up for leaf 0 */
641         (edge[2].edgemap)[1] = 2; /* go up/down left for leaf 1 */
642         (edge[2].edgemap)[2] = 0; /* you are on the right edge */
643         
644         /* interconnection */
645         edge[0].up = NULL;
646         edge[0].downleft = &edge[1];
647         edge[0].downright = &edge[2];
648         edge[1].up = &edge[0];
649         edge[1].downleft = NULL;
650         edge[1].downright = NULL;
651         edge[2].up = &edge[0];
652         edge[2].downleft = NULL;
653         edge[2].downright = NULL;
654         
655         /* edges of leaves */
656         edgeofleaf[0] = 0;
657         edgeofleaf[1] = 1;
658         edgeofleaf[2] = 2;
659 } /* inittree */
660
661 /* add next leaf on the specified edge */
662 void addnextleaf(int dockedge)
663 {       
664         int i;
665
666         if (dockedge >= nextedge) {
667                 /* Trying to add leaf nextleaf to nonexisting edge dockedge */
668                 FPRINTF(STDOUTFILE "\n\n\nHALT: PLEASE REPORT ERROR F TO DEVELOPERS\n\n\n");
669                 exit(1);
670         }
671         
672         if (nextleaf >= Maxspc) {
673                 /* Trying to add leaf nextleaf to a tree with Maxspc leaves */
674                 FPRINTF(STDOUTFILE "\n\n\nHALT: PLEASE REPORT ERROR G TO DEVELOPERS\n\n\n");
675                 exit(1);
676         }
677                 
678         /* necessary change in edgeofleaf if dockedge == edgeofleaf[0] */
679         if (edgeofleaf[0] == dockedge) edgeofleaf[0] = nextedge;
680         
681         /* adding nextedge to the tree */
682         edge[nextedge].up = edge[dockedge].up;
683         edge[nextedge].downleft = &edge[dockedge];
684         edge[nextedge].downright = &edge[nextedge+1];
685         edge[dockedge].up = &edge[nextedge];
686
687         if (edge[nextedge].up != NULL) {
688                 if ( ((edge[nextedge].up)->downleft) == &edge[dockedge] ) 
689                         (edge[nextedge].up)->downleft  = &edge[nextedge];
690                 else   
691                         (edge[nextedge].up)->downright = &edge[nextedge];
692         }
693
694         /* adding nextedge + 1 to the tree */
695         edge[nextedge+1].up = &edge[nextedge];
696         edge[nextedge+1].downleft = NULL;
697         edge[nextedge+1].downright = NULL;
698         edgeofleaf[nextleaf] = nextedge+1;
699         
700         /* the two new edges get info about the old edges */
701         /* nextedge */
702         for (i = 0; i < nextedge; i++) {
703                 switch ( (edge[dockedge].edgemap)[i] ) {
704                         
705                         /* down right changes to down left */
706                         case 5:         (edge[nextedge].edgemap)[i] = 4;
707                                                 break;
708                                                 
709                         /* null changes to down left */
710                         case 0:         (edge[nextedge].edgemap)[i] = 4;
711                                                 break;
712                         
713                         default:        (edge[nextedge].edgemap)[i] =
714                                                         (edge[dockedge].edgemap)[i];
715                                                 break;
716                 }
717         }
718
719         /* nextedge + 1 */
720         for (i = 0; i < nextedge; i++) {
721                 switch ( (edge[dockedge].edgemap)[i] ) {
722                         
723                         /* up/down left changes to up */
724                         case 2:         (edge[nextedge+1].edgemap)[i] = 1;
725                                                 break;
726                         
727                         /* up/down right changes to up */               
728                         case 3:         (edge[nextedge+1].edgemap)[i] = 1;
729                                                 break;
730                                                 
731                         /* down left changes to up/down left */                 
732                         case 4:         (edge[nextedge+1].edgemap)[i] = 2;
733                                                 break;
734
735                         /* down right changes to up/down left */        
736                         case 5:         (edge[nextedge+1].edgemap)[i] = 2;
737                                                 break;
738                         
739                         /* null changes to up/down left */
740                         case 0:         (edge[nextedge+1].edgemap)[i] = 2;
741                                                 break;
742                         
743                         /* up stays up */
744                         default:        (edge[nextedge+1].edgemap)[i] =
745                                                         (edge[dockedge].edgemap)[i];
746                                                 break;
747                 }
748         }
749
750         /* dockedge */
751         for (i = 0; i < nextedge; i++) {
752                 switch ( (edge[dockedge].edgemap)[i] ) {
753                         
754                         /* up/down right changes to up */
755                         case 3:         (edge[dockedge].edgemap)[i] = 1;
756                                                 break;
757                                                 
758                         /* up/down left changes to up */
759                         case 2:         (edge[dockedge].edgemap)[i] = 1;
760                                                 break;
761                         
762                         default:        break;
763                 }
764         }
765
766         /* all edgemaps are updated for the two new edges */
767         /* nextedge */
768         (edge[nextedge].edgemap)[nextedge] = 0;
769         (edge[nextedge].edgemap)[nextedge+1] = 5; /* down right */
770         
771         /* nextedge + 1 */
772         (edge[nextedge+1].edgemap)[nextedge] = 1; /* up */
773         (edge[nextedge+1].edgemap)[nextedge+1] = 0;
774         
775         /* all other edges */
776         for (i = 0; i < nextedge; i++) {
777                 (edge[i].edgemap)[nextedge] = (edge[i].edgemap)[dockedge];
778                 (edge[i].edgemap)[nextedge+1] = (edge[i].edgemap)[dockedge];
779         }
780         
781         /* an extra for dockedge */
782         (edge[dockedge].edgemap)[nextedge] = 1; /* up */
783         (edge[dockedge].edgemap)[nextedge+1] = 3; /* up/down right */
784
785         nextleaf++;
786         nextedge = nextedge + 2;
787 } /* addnextleaf */
788
789
790 /* free memory (to be called after inittree) */
791 void freetree()
792 {
793         int i;
794
795         for (i = 0; i < 2 * Maxspc - 3; i++) free(edge[i].edgemap);
796         free(edge);
797         free(edgeofleaf);       
798 } /* freetree */
799
800 /* writes OTU sitting on edge ed */
801 void writeOTU(FILE *outfp, int ed)
802 {       
803         int i;
804
805         /* test whether we are on a leaf */
806         if (edge[ed].downright == NULL && edge[ed].downleft == NULL) {
807                 for (i = 1; i < nextleaf; i++) {
808                         if (edgeofleaf[i] == ed) { /* i is the leaf of ed */
809                                 column += fputid(outfp, trueID[i]);
810                                 return;
811                         }
812                 }
813         }
814
815         /* we are NOT on a leaf */
816         fprintf(outfp, "(");
817         column++;
818         writeOTU(outfp, edge[ed].downleft->numedge);
819         fprintf(outfp, ",");
820         column++;
821         column++;
822         if (column > 55) {
823                 column = 2;
824                 fprintf(outfp, "\n  ");
825         }
826         writeOTU(outfp, edge[ed].downright->numedge);   
827         fprintf(outfp, ")");
828         column++;
829 } /* writeOTU */
830
831 /* write tree */
832 void writetree(FILE *outfp)
833 {       
834         column = 1;
835         fprintf(outfp, "(");
836         column += fputid(outfp, trueID[0]) + 3;
837         fprintf(outfp, ",");
838         writeOTU(outfp, edge[edgeofleaf[0]].downleft->numedge);
839         column++;
840         column++;
841         fprintf(outfp, ",");
842         writeOTU(outfp, edge[edgeofleaf[0]].downright->numedge);        
843         fprintf(outfp, ");\n"); 
844 } /* writetree */
845
846
847 /* clear all edgeinfos */
848 void resetedgeinfo()
849 {
850         int i;
851         
852         for (i = 0; i < nextedge; i++)
853                 edge[i].edgeinfo = 0;
854 } /* resetedgeinfo */
855
856 /* increment all edgeinfo between leaf A and B */
857 void incrementedgeinfo(int A, int B)
858 {       
859         int curredge, finaledge, nextstep;
860         
861         if (A == B) return;
862         
863         finaledge = edgeofleaf[B];
864         
865         curredge = edgeofleaf[A];
866         edge[curredge].edgeinfo = edge[curredge].edgeinfo + 1;
867         
868         while (curredge != finaledge) {
869                 nextstep = (edge[curredge].edgemap)[finaledge];
870                 switch (nextstep) {
871
872                         /* up */
873                         case 1: curredge = (edge[curredge].up)->numedge;
874                                         break;
875                                 
876                         /* up/down left */
877                         case 2: curredge = ((edge[curredge].up)->downleft)->numedge;
878                                         break;
879
880                         /* up/down right */
881                         case 3: curredge = ((edge[curredge].up)->downright)->numedge;
882                                         break;
883                                 
884                         /* down left */
885                         case 4: curredge = (edge[curredge].downleft)->numedge;
886                                         break;
887                                 
888                         /* down right */
889                         case 5: curredge = (edge[curredge].downright)->numedge;
890                                         break;
891                                 
892                 }
893                 edge[curredge].edgeinfo = edge[curredge].edgeinfo + 1;
894         }       
895 } /* incrementedgeinfo */
896
897 /* checks which edge has the lowest edgeinfo
898    if there are several edges with the same lowest edgeinfo,
899    one of them will be selected randomly */
900 void minimumedgeinfo()
901 {
902         int i, k, howmany, randomnum;
903
904         howmany = 1;
905         minedge = 0;
906         mininfo = edge[0].edgeinfo;
907         for (i = 1; i < nextedge; i++)
908                 if (edge[i].edgeinfo <= mininfo) {
909                         if (edge[i].edgeinfo == mininfo) {
910                                 howmany++;
911                         } else {
912                                 minedge = i;
913                                 mininfo = edge[i].edgeinfo;
914                                 howmany = 1;
915                         }
916                 }
917         
918         if (howmany > 1) { /* draw random edge */
919                 randomnum = randominteger(howmany) + 1; /* 1 to howmany */
920                 i = -1;
921                 for (k = 0; k < randomnum; k++) {
922                         do {
923                                 i++;
924                         } while (edge[i].edgeinfo != mininfo);
925                         minedge = i;
926                 }
927         }
928 } /* minimumedgeinfo */
929
930
931
932
933 /*******************************************/
934 /* tree sorting                            */
935 /*******************************************/
936
937 /* compute address of the 4 int (sort key) in the 4 int node */
938 int ct_sortkeyaddr(int addr)
939 {
940   int a, res;
941   a = addr % 4;
942   res = addr - a + 3;
943   return res;
944 }
945
946
947 /**********/
948
949 /* compute address of the next edge pointer in a 4 int node (0->1->2->0) */
950 int ct_nextedgeaddr(int addr)
951 {
952   int a, res;
953   a = addr % 4;
954   if ( a == 2 ) { res = addr - 2; }
955   else          { res = addr + 1; }
956   return res;
957 }
958
959
960 /**********/
961
962 /* compute address of 1st edge of a 4 int node from node number */
963 int ct_1stedge(int node)
964 {
965   int res;
966   res = 4 * node;
967   return res;
968 }
969
970
971 /**********/
972
973 /* compute address of 2nd edge of a 4 int node from node number */
974 int ct_2ndedge(int node)
975 {
976   int res;
977   res = 4 * node +1;
978   return res;
979 }
980
981
982 /**********/
983
984 /* compute address of 3rd edge of a 4 int node from node number */
985 int ct_3rdedge(int node)
986 {
987   int res;
988   res = 4 * node +2;
989   return res;
990 }
991
992
993 /**********/
994
995 /* check whether node 'node' is a leaf (2nd/3rd edge pointer = -1) */
996 int ct_isleaf(int node, int *ctree)
997 {
998   return (ctree[ct_3rdedge(node)] < 0);
999 }
1000
1001
1002 /**********/
1003
1004 /* compute node number of 4 int node from an edge addr. */
1005 int ct_addr2node(int addr)
1006 {
1007   int a, res;
1008   a = addr % 4;
1009   res = (int) ((addr - a) / 4);
1010   return res;
1011 }
1012
1013
1014 /**********/
1015
1016 /* print graph pointers for checking */
1017 void printctree(int *ctree)
1018 {
1019         int n;
1020         for (n=0; n < 2*Maxspc; n++) {
1021                 printf("n[%3d] = (%3d.%2d, %3d.%2d, %3d.%2d | %3d)\n", n,
1022                 (int) ctree[ct_1stedge(n)]/4,
1023                 (int) ctree[ct_1stedge(n)]%4,
1024                 (int) ctree[ct_2ndedge(n)]/4,
1025                 (int) ctree[ct_2ndedge(n)]%4,
1026                 (int) ctree[ct_3rdedge(n)]/4,
1027                 (int) ctree[ct_3rdedge(n)]%4,
1028                 ctree[ct_3rdedge(n)+1]);
1029         }
1030         printf("\n");
1031 } /* printctree */
1032
1033
1034 /**********/
1035
1036 /* allocate memory for ctree 3 ints pointer plus 1 check byte */
1037 int *initctree()
1038 {
1039   int *snodes;
1040   int n;
1041
1042   snodes = (int *) malloc(4 * 2 * Maxspc * sizeof(int));
1043   if (snodes == NULL) maerror("snodes in copytree");
1044
1045   for (n=0; n<(4 * 2 * Maxspc); n++) {
1046       snodes[n]=-1;
1047   }
1048   return snodes;
1049 }
1050
1051
1052 /**********/
1053
1054 /* free memory of a tree for sorting */
1055 void freectree(int **snodes)
1056 {
1057         free(*snodes);
1058         *snodes = NULL;
1059 }
1060
1061
1062 /**********/
1063
1064 /* copy subtree recursively */
1065 void copyOTU(int *ctree,                  /* tree array struct            */
1066              int *ct_nextnode,            /* next free node               */
1067              int ct_curredge,             /* currende edge to add subtree */
1068              int *ct_nextleaf,            /* next free leaf (0-maxspc)    */
1069              int ed)                      /* edge in puzzling step tree   */
1070 {
1071         int i, nextcurredge;
1072
1073         /* test whether we are on a leaf */
1074         if (edge[ed].downright == NULL && edge[ed].downleft == NULL) {
1075                 for (i = 1; i < nextleaf; i++) {
1076                         if (edgeofleaf[i] == ed) { /* i is the leaf of ed */
1077                                 nextcurredge          = ct_1stedge(*ct_nextleaf);
1078                                 ctree[ct_curredge]    = nextcurredge;
1079                                 ctree[nextcurredge]   = ct_curredge;
1080                                 ctree[ct_sortkeyaddr(nextcurredge)] = trueID[i];
1081                                 (*ct_nextleaf)++;
1082                                 return;
1083                         }
1084                 }
1085         }
1086
1087         /* we are NOT on a leaf */
1088         nextcurredge        = ct_1stedge(*ct_nextnode);
1089         ctree[ct_curredge]     = nextcurredge;
1090         ctree[nextcurredge] = ct_curredge;
1091         (*ct_nextnode)++;
1092         nextcurredge = ct_nextedgeaddr(nextcurredge);
1093         copyOTU(ctree, ct_nextnode, nextcurredge, 
1094                 ct_nextleaf, edge[ed].downleft->numedge);
1095
1096         nextcurredge = ct_nextedgeaddr(nextcurredge);
1097         copyOTU(ctree, ct_nextnode, nextcurredge, 
1098                 ct_nextleaf, edge[ed].downright->numedge);
1099 }
1100
1101
1102 /**********/
1103
1104 /* copy treestructure to sorting structure */
1105 void copytree(int *ctree)
1106 {
1107         int ct_curredge;
1108         int ct_nextleaf;
1109         int ct_nextnode;
1110
1111         ct_nextnode = Maxspc;
1112         ct_curredge = ct_1stedge(ct_nextnode);
1113         ct_nextleaf = 1;
1114
1115         ctree[ct_1stedge(0)] = ct_curredge;
1116         ctree[ct_curredge]   = ct_1stedge(0);
1117         ctree[ct_sortkeyaddr(0)] = trueID[0];
1118
1119         ct_nextnode++;
1120         
1121         ct_curredge = ct_nextedgeaddr(ct_curredge);
1122         copyOTU(ctree, &ct_nextnode, ct_curredge, 
1123                 &ct_nextleaf, edge[edgeofleaf[0]].downleft->numedge);
1124
1125         ct_curredge = ct_nextedgeaddr(ct_curredge);
1126         copyOTU(ctree, &ct_nextnode, ct_curredge, 
1127                 &ct_nextleaf, edge[edgeofleaf[0]].downright->numedge);
1128 }
1129
1130
1131 /**********/
1132
1133 /* sort subtree from edge recursively by indices */
1134 int sortOTU(int edge, int *ctree)
1135 {
1136         int key1, key2;
1137         int edge1, edge2;
1138         int tempedge;
1139
1140         if (ctree[ct_2ndedge((int) (edge / 4))] < 0)
1141                 return ctree[ct_sortkeyaddr(edge)];
1142
1143         edge1 = ctree[ct_nextedgeaddr(edge)];
1144         edge2 = ctree[ct_nextedgeaddr(ct_nextedgeaddr(edge))];
1145
1146         /* printf ("visiting [%5d] -> [%5d], [%5d]\n", edge, edge1, edge2); */
1147         /* printf ("visiting [%2d.%2d] -> [%2d.%2d], [%2d.%2d]\n", 
1148            (int)(edge/4), edge%4, (int)(edge1/4), edge1%4, 
1149            (int)(edge2/4), edge2%4); */
1150
1151         key1  = sortOTU(edge1, ctree); 
1152         key2  = sortOTU(edge2, ctree); 
1153         
1154         if (key2 < key1) {
1155                 tempedge            = ctree[ctree[edge1]];
1156                 ctree[ctree[edge1]] = ctree[ctree[edge2]];
1157                 ctree[ctree[edge2]] = tempedge;
1158                 tempedge            = ctree[edge1];
1159                 ctree[edge1]        = ctree[edge2];
1160                 ctree[edge2]        = tempedge;
1161                 ctree[ct_sortkeyaddr(edge)] = key2;
1162                 
1163         } else {
1164           ctree[ct_sortkeyaddr(edge)] = key1;
1165         }
1166         return ctree[ct_sortkeyaddr(edge)];
1167 }
1168
1169
1170 /**********/
1171
1172 /* sort ctree recursively by indices */
1173 int sortctree(int *ctree)
1174 {
1175         int n, startnode=-1;
1176         for(n=0; n<Maxspc; n++) {
1177                 if (ctree[ct_sortkeyaddr(n*4)] == 0)
1178                         startnode = n;
1179         }
1180         sortOTU(ctree[startnode * 4], ctree);
1181         return startnode;
1182 }
1183
1184
1185 /**********/
1186
1187 /* print recursively subtree of edge of sorted tree ctree */
1188 void printfsortOTU(int edge, int *ctree)
1189 {
1190         int edge1, edge2;
1191
1192         if (ctree[ct_2ndedge((int) (edge / 4))] < 0) {
1193                 printf("%d", ctree[ct_sortkeyaddr(edge)]);
1194                 return;
1195         }
1196
1197         edge1 = ctree[ct_nextedgeaddr(edge)];
1198         edge2 = ctree[ct_nextedgeaddr(ct_nextedgeaddr(edge))];
1199
1200         printf("(");
1201         printfsortOTU(edge1, ctree); 
1202         printf(",");
1203         printfsortOTU(edge2, ctree); 
1204         printf(")");
1205
1206 }
1207
1208
1209 /**********/
1210
1211 /* print recursively sorted tree ctree */
1212 int printfsortctree(int *ctree)
1213 {
1214         int n, startnode=-1;
1215         for(n=0; n<Maxspc; n++) {
1216                 if (ctree[ct_sortkeyaddr(n*4)] == 0)
1217                         startnode = n;
1218         }
1219         printf ("(%d,", ctree[ct_sortkeyaddr(startnode*4)]);
1220         printfsortOTU(ctree[startnode * 4], ctree);
1221         printf (");\n");
1222         return startnode;
1223 }
1224
1225
1226 /**********/
1227
1228 /* print recursively subtree of edge of sorted tree ctree to string */
1229 void sprintfOTU(char *str, int *len, int edge, int *ctree)
1230 {
1231         int edge1, edge2;
1232
1233         if (ctree[ct_2ndedge((int) (edge / 4))] < 0) {
1234                 *len+=sprintf(&(str[*len]), "%d", ctree[ct_sortkeyaddr(edge)]);
1235                 return;
1236         }
1237
1238         edge1 = ctree[ct_nextedgeaddr(edge)];
1239         edge2 = ctree[ct_nextedgeaddr(ct_nextedgeaddr(edge))];
1240
1241         sprintf(&(str[*len]), "(");
1242         (*len)++;
1243         sprintfOTU(str, len, edge1, ctree); 
1244         sprintf(&(str[*len]), ",");
1245         (*len)++;
1246         sprintfOTU(str, len, edge2, ctree); 
1247         sprintf(&(str[*len]), ")");
1248         (*len)++;
1249 }
1250
1251 /**********/
1252
1253 /* print recursively sorted tree ctree to string */
1254 char *sprintfctree(int *ctree, int strglen)
1255 {
1256         char *treestr,
1257              *tmpptr;
1258         int n,
1259             len=0,
1260             startnode=-1;
1261         treestr = (char *) malloc(strglen * sizeof(char));
1262         tmpptr  = treestr;
1263         for(n=0; n<Maxspc; n++) {
1264                 if (ctree[ct_sortkeyaddr(n*4)] == 0)
1265                         startnode = n;
1266         }
1267         len+=sprintf (&(tmpptr[len]), "(%d,", ctree[ct_sortkeyaddr(startnode*4)]);
1268         sprintfOTU(tmpptr, &len, ctree[startnode * 4], ctree);
1269         len+=sprintf (&(tmpptr[len]), ");");
1270         return treestr;
1271 }
1272
1273
1274 /**********/
1275
1276
1277 /***********************************************/
1278 /* establish and handle a list of sorted trees */
1279 /***********************************************/
1280
1281 int itemcount;
1282
1283 /* initialize structure */
1284 treelistitemtype *inittreelist(int *treenum)
1285 {
1286         *treenum = 0;
1287         return    NULL;
1288 }
1289
1290
1291 /**********/
1292
1293 /* malloc new tree list item */
1294 treelistitemtype *gettreelistitem()
1295 {
1296         treelistitemtype *tmpptr;
1297         tmpptr = (treelistitemtype *)malloc(sizeof(treelistitemtype));
1298         if (tmpptr == NULL) maerror("item of intermediate tree stuctures");
1299         (*tmpptr).pred = NULL;
1300         (*tmpptr).succ = NULL;
1301         (*tmpptr).tree = NULL;
1302         (*tmpptr).count = 0;
1303         (*tmpptr).idx = itemcount++;
1304         return tmpptr;
1305 }
1306
1307 /**********/
1308
1309 /* free whole tree list */
1310 void freetreelist(treelistitemtype **list,
1311                   int               *numitems,
1312                   int               *numsum)
1313 {
1314         treelistitemtype *current; 
1315         treelistitemtype *next;
1316         current = *list;
1317         while (current != NULL) {
1318                 next = (*current).succ;
1319                 if ((*current).tree != NULL) {
1320                         free ((*current).tree);
1321                         (*current).tree = NULL;
1322                 }
1323                 free(current);
1324                 current = next;
1325         }
1326         *list = NULL;
1327         *numitems = 0;
1328         *numsum = 0;
1329 } /* freetreelist */
1330
1331
1332 /**********/
1333
1334 /* add tree to the tree list */
1335 treelistitemtype *addtree2list(char             **tree,         /* sorted tree string */
1336                                int                numtrees,     /* how many occurred, e.g. in parallel */
1337                                treelistitemtype **list,         /* addr. of tree list */
1338                                int               *numitems,     
1339                                int               *numsum)
1340 {
1341         treelistitemtype *tmpptr = NULL;
1342         treelistitemtype *newptr = NULL;
1343         int               result;
1344         int               done = 0;
1345
1346         if ((*list == NULL) || (numitems == 0)) {
1347                 newptr = gettreelistitem();
1348                 (*newptr).tree = *tree; 
1349                 *tree = NULL;
1350                 (*newptr).id    = *numitems;
1351                 (*newptr).count = numtrees;
1352                 *numitems = 1;
1353                 *numsum   = numtrees;
1354                 *list = newptr;
1355         } else {
1356                 tmpptr = *list;
1357                 while(done == 0) {
1358                         result = strcmp( (*tmpptr).tree, *tree);
1359                         if (result==0) {
1360                                 free(*tree); *tree = NULL;
1361                                 (*tmpptr).count += numtrees;
1362                                 *numsum += numtrees;
1363                                 done = 1;
1364                                 newptr = tmpptr;
1365                         } else { if (result < 0) {
1366                                         if ((*tmpptr).succ != NULL)
1367                                                 tmpptr = (*tmpptr).succ;
1368                                         else {
1369                                                 newptr = gettreelistitem();
1370                                                 (*newptr).tree = *tree; 
1371                                                 *tree = NULL;
1372                                                 (*newptr).id    = *numitems;
1373                                                 (*newptr).count = numtrees;
1374                                                 (*newptr).pred  = tmpptr;
1375                                                 (*tmpptr).succ  = newptr;
1376                                                 (*numitems)++;
1377                                                 *numsum += numtrees;
1378                                                 done = 1;
1379                                         }
1380                         } else { /* result < 0 */
1381                                 newptr = gettreelistitem();
1382                                 (*newptr).tree = *tree; 
1383                                 *tree = NULL;
1384                                 (*newptr).id    = *numitems;
1385                                 (*newptr).count = numtrees;
1386                                 (*newptr).succ  = tmpptr;
1387                                 (*newptr).pred  = (*tmpptr).pred;
1388                                 (*tmpptr).pred  = newptr;
1389                                 *numsum += numtrees;
1390
1391                                 if ((*newptr).pred != NULL) {
1392                                    (*(*newptr).pred).succ = newptr;
1393                                 } else {
1394                                    *list = newptr;
1395                                 }
1396                                 (*numitems)++;
1397                                 done = 1;
1398                         } /* end if result < 0 */
1399                         } /* end if result != 0 */
1400                 } /* while  searching in list */
1401         } /* if list empty, else */
1402         return (newptr);
1403 } /* addtree2list */
1404
1405
1406 /**********/
1407
1408 /* resort list of trees by number of occurences for output */
1409 void sortbynum(treelistitemtype *list, treelistitemtype **sortlist)
1410 {
1411         treelistitemtype *tmpptr = NULL;
1412         treelistitemtype *curr = NULL;
1413         treelistitemtype *next = NULL;
1414         int xchange = 1;
1415
1416         if (list == NULL) fprintf(stderr, "Grrrrrrrrr>>>>\n");
1417         tmpptr = list;
1418         *sortlist = list;
1419         while (tmpptr != NULL) {
1420                 (*tmpptr).sortnext = (*tmpptr).succ;
1421                 (*tmpptr).sortlast = (*tmpptr).pred;
1422                 tmpptr = (*tmpptr).succ;
1423         }
1424
1425         while (xchange > 0) {
1426                 curr = *sortlist;
1427                 xchange = 0;
1428                 if (curr == NULL) fprintf(stderr, "Grrrrrrrrr>>>>\n");
1429                 while((*curr).sortnext != NULL) {
1430                         next = (*curr).sortnext;
1431                         if ((*curr).count >= (*next).count)
1432                                 curr = (*curr).sortnext;
1433                         else {
1434                                 if ((*curr).sortlast != NULL)
1435                                         (*((*curr).sortlast)).sortnext = next;
1436                                 if (*sortlist == curr)
1437                                         *sortlist = next;
1438                                 (*next).sortlast = (*curr).sortlast;
1439
1440                                 if ((*next).sortnext != NULL)
1441                                         (*((*next).sortnext)).sortlast = curr;
1442                                 (*curr).sortnext = (*next).sortnext;
1443
1444                                 (*curr).sortlast = next;
1445                                 (*next).sortnext = curr;
1446
1447                                 xchange++;
1448                         }
1449                 }
1450         }
1451 }  /* sortbynum */
1452
1453
1454 /**********/
1455
1456 /* print puzzling step tree stuctures for checking */
1457 void printfpstrees(treelistitemtype *list)
1458 {
1459         char ch;
1460         treelistitemtype *tmpptr = NULL;
1461         tmpptr = list;
1462         ch = '-';
1463         while (tmpptr != NULL) {
1464                 printf ("%c[%2d]  %5d     %s\n", ch, (*tmpptr).idx, (*tmpptr).count, (*tmpptr).tree);
1465                 tmpptr = (*tmpptr).succ;
1466                 ch = ' ';
1467         }
1468 }
1469
1470 /**********/
1471
1472 /* print sorted puzzling step tree stucture with names */
1473 void fprintffullpstree(FILE *outf, char *treestr)
1474 {
1475         int count = 0;
1476         int idnum = 0;
1477         int n;
1478         for(n=0; treestr[n] != '\0'; n++){
1479                 while(isdigit((int)treestr[n])){
1480                         idnum = (10 * idnum) + ((int)treestr[n]-48);
1481                         n++;
1482                         count++;
1483                 }
1484                 if (count > 0){
1485 #                       ifdef USEQUOTES
1486                                 fprintf(outf, "'");
1487 #                       endif
1488                         (void)fputid(outf, idnum);
1489 #                       ifdef USEQUOTES
1490                                 fprintf(outf, "'");
1491 #                       endif
1492                         count = 0;
1493                         idnum = 0;
1494                 }
1495                 fprintf(outf, "%c", treestr[n]);
1496         }
1497 }
1498
1499
1500 /**********/
1501
1502 /* print sorted puzzling step tree stuctures with names */
1503 void fprintfsortedpstrees(FILE *output, 
1504                           treelistitemtype *list,  /* tree list */
1505                           int itemnum,             /* order number */
1506                           int itemsum,             /* number of trees */
1507                           int comment,             /* with statistics, or puzzle report ? */
1508                           float cutoff)            /* cutoff percentage */
1509 {
1510         treelistitemtype *tmpptr = NULL;
1511         treelistitemtype *slist = NULL;
1512         int num = 1;
1513         float percent;
1514
1515         if (list == NULL) fprintf(stderr, "Grrrrrrrrr>>>>\n");
1516         sortbynum(list, &slist); 
1517
1518         tmpptr = slist;
1519         while (tmpptr != NULL) {
1520                 percent = (float)(100.0 * (*tmpptr).count / itemsum);
1521                 if ((cutoff == 0.0) || (cutoff <= percent)) {
1522                         if (comment)
1523                                 fprintf (output, "[ %d. %d %.2f %d %d %d ]", num++, (*tmpptr).count, percent, (*tmpptr).id, itemnum, itemsum);
1524                         else {
1525                                 if (num == 1){
1526                                         fprintf (output, "\n");
1527                                         fprintf (output, "The following tree(s) occured in more than %.2f%% of the %d puzzling steps.\n", cutoff, itemsum);
1528                                         fprintf (output, "The trees are orderd descending by the number of occurences.\n");
1529                                         fprintf (output, "\n");
1530                                         fprintf (output, "\n       occurences    ID  Phylip tree\n");
1531                                 }
1532                                 fprintf (output, "%2d. %5d %6.2f%% %5d  ", num++, (*tmpptr).count, percent, (*tmpptr).id);
1533                         }
1534                         fprintffullpstree(output, (*tmpptr).tree);
1535                         fprintf (output, "\n");
1536                 }
1537                 tmpptr = (*tmpptr).sortnext;
1538         }
1539
1540         if (!comment) {
1541                 fprintf (output, "\n");
1542                 switch(num) {
1543                         case 1: fprintf (output, "There were no tree topologies (out of %d) occuring with a percentage >= %.2f%% of the %d puzzling steps.\n", itemnum, cutoff, itemsum); break;
1544                         case 2: fprintf (output, "There was one tree topology (out of %d) occuring with a percentage >= %.2f%%.\n", itemnum, cutoff); break;
1545                         default: fprintf (output, "There were %d tree topologies (out of %d) occuring with a percentage >= %.2f%%.\n", num-1, itemnum, cutoff); break;
1546                 }
1547                 fprintf (output, "\n");
1548                 fprintf (output, "\n");
1549         }
1550         
1551 }  /* fprintfsortedpstrees */
1552
1553 /**********/
1554
1555 /* print sorted tree topologies for checking */
1556 void printfsortedpstrees(treelistitemtype *list)
1557 {
1558         treelistitemtype *tmpptr = NULL;
1559         treelistitemtype *slist = NULL;
1560
1561         sortbynum(list, &slist); 
1562
1563         tmpptr = slist;
1564         while (tmpptr != NULL) {
1565                 printf ("[%2d]  %5d     %s\n", (*tmpptr).idx, (*tmpptr).count, (*tmpptr).tree);
1566                 tmpptr = (*tmpptr).sortnext;
1567         }
1568 }  /* printfsortedpstrees */
1569
1570
1571 /*******************************************/
1572 /* end of tree sorting                     */
1573 /*******************************************/
1574
1575
1576
1577 /******************************************************************************/
1578 /* functions for computing the consensus tree                                 */
1579 /******************************************************************************/
1580
1581 /* prepare for consensus tree analysis */
1582 void initconsensus()
1583 {
1584 #       if ! PARALLEL
1585                 biparts = new_cmatrix(Maxspc-3, Maxspc);
1586 #       endif /* PARALLEL */
1587
1588         if (Maxspc % 32 == 0)
1589                 splitlength = Maxspc/32;
1590         else splitlength = (Maxspc + 32 - (Maxspc % 32))/32;
1591         numbiparts = 0; /* no pattern stored so far */
1592         maxbiparts = 0; /* no memory reserved so far */
1593         splitfreqs = NULL;
1594         splitpatterns = NULL;
1595         splitsizes = NULL;
1596         splitcomp = (uli *) malloc(splitlength * sizeof(uli) );
1597         if (splitcomp == NULL) maerror("splitcomp in initconsensus");
1598 }
1599
1600 /* prototype needed for recursive function */
1601 void makepart(int i, int curribrnch);
1602
1603 /* recursive function to get bipartitions */
1604 void makepart(int i, int curribrnch)
1605 {       
1606         int j;
1607         
1608         if ( edge[i].downright == NULL ||
1609                   edge[i].downleft == NULL) { /* if i is leaf */
1610                         
1611                         /* check out what leaf j sits on this edge i */         
1612                         for (j = 1; j < Maxspc; j++) {
1613                                 if (edgeofleaf[j] == i) {
1614                                         biparts[curribrnch][trueID[j]] = '*';   
1615                                         return;
1616                                 }       
1617                         }
1618         } else { /* still on inner branch */
1619                 makepart(edge[i].downleft->numedge, curribrnch);
1620                 makepart(edge[i].downright->numedge, curribrnch);
1621         }
1622 }
1623
1624 /* compute bipartitions of tree of current puzzling step */
1625 void computebiparts()
1626 {
1627         int i, j, curribrnch;
1628         
1629         curribrnch = -1;
1630         
1631         for (i = 0; i < Maxspc - 3; i++)
1632                 for (j = 0; j < Maxspc; j++)
1633                         biparts[i][j] = '.';
1634
1635         for (i = 0; i < Maxbrnch; i++) {
1636                 if (!(     edgeofleaf[0] == i    ||
1637                        edge[i].downright == NULL ||
1638                         edge[i].downleft == NULL) ) { /* check all inner branches */
1639                         curribrnch++;
1640                         makepart(i, curribrnch);
1641                         
1642                         /* make sure that the root is always a '*' */
1643                         if (biparts[curribrnch][outgroup] == '.') {
1644                                 for (j = 0; j < Maxspc; j++) {
1645                                         if (biparts[curribrnch][j] == '.')
1646                                                 biparts[curribrnch][j] = '*';
1647                                         else
1648                                                 biparts[curribrnch][j] = '.';
1649                                 }
1650                         }
1651                 }
1652         }
1653 }
1654
1655 /* print out the bipartition n of all different splitpatterns */
1656 void printsplit(FILE *fp, uli n)
1657 {
1658         int i, j, col;
1659         uli z;
1660         
1661         col = 0;
1662         for (i = 0; i < splitlength; i++) {
1663                 z = splitpatterns[n*splitlength + i];
1664                 for (j = 0; j < 32 && col < Maxspc; j++) {
1665                         if (col % 10 == 0 && col != 0) fprintf(fp, " ");
1666                         if (z & 1) fprintf(fp, ".");
1667                         else fprintf(fp, "*");
1668                         z = (z >> 1);
1669                         col++;
1670                 }
1671         }
1672 }
1673
1674 /* make new entries for new different bipartitions and count frequencies */
1675 void makenewsplitentries()
1676 {
1677         int i, j, bpc, identical, idflag, bpsize;
1678         uli nextentry, obpc;
1679
1680         /* where the next entry would be in splitpatterns */
1681         nextentry = numbiparts;
1682         
1683         for (bpc = 0; bpc < Maxspc - 3; bpc++) { /* for every new bipartition */        
1684                 /* convert bipartition into a more compact format */
1685                 bpsize = 0;
1686                 for (i = 0; i < splitlength; i++) {
1687                         splitcomp[i] = 0;       
1688                         for (j = 0; j < 32; j++) {
1689                                 splitcomp[i] = splitcomp[i] >> 1;
1690                                 if (i*32 + j < Maxspc)
1691                                         if (biparts[bpc][i*32 + j] == '.') {
1692                                                 /* set highest bit */
1693                                                 splitcomp[i] = (splitcomp[i] | 2147483648UL);
1694                                                 bpsize++; /* count the '.' */
1695                                         }
1696                         }
1697                 }               
1698                 /* compare to the *old* patterns */
1699                 identical = FALSE;
1700                 for (obpc = 0; (obpc < numbiparts) && (!identical); obpc++) {
1701                         /* compare first partition size */
1702                         if (splitsizes[obpc] == bpsize) idflag = TRUE;
1703                         else idflag = FALSE;
1704                         /* if size is identical compare whole partition */
1705                         for (i = 0; (i < splitlength) && idflag; i++)
1706                                 if (splitcomp[i] != splitpatterns[obpc*splitlength + i])
1707                                         idflag = FALSE;
1708                         if (idflag) identical = TRUE;
1709                 }
1710                 if (identical) { /* if identical increase frequency */
1711                         splitfreqs[2*(obpc-1)]++;
1712                 } else { /* create new entry */
1713                         if (nextentry == maxbiparts) { /* reserve more memory */
1714                                 maxbiparts = maxbiparts + 2*Maxspc;
1715                                 splitfreqs = (uli *) myrealloc(splitfreqs,
1716                                         2*maxbiparts * sizeof(uli) );
1717                                 /* 2x: splitfreqs contains also an index (sorting!) */
1718                                 if (splitfreqs == NULL) maerror("splitfreqs in makenewsplitentries");
1719                                 splitpatterns = (uli *) myrealloc(splitpatterns,
1720                                         splitlength*maxbiparts * sizeof(uli) );
1721                                 if (splitpatterns == NULL) maerror("splitpatterns in makenewsplitentries");
1722                                 splitsizes = (int *) myrealloc(splitsizes,
1723                                         maxbiparts * sizeof(int) );
1724                                 if (splitsizes == NULL) maerror("splitsizes in makenewsplitentries");
1725                         }
1726                         splitfreqs[2*nextentry] = 1; /* frequency */
1727                         splitfreqs[2*nextentry+1] = nextentry; /* index for sorting */
1728                         for (i = 0; i < splitlength; i++)
1729                                 splitpatterns[nextentry*splitlength + i] = splitcomp[i];
1730                         splitsizes[nextentry] = bpsize;
1731                         nextentry++;
1732                 }
1733         }
1734         numbiparts = nextentry;
1735 }
1736
1737 /* general remarks:
1738
1739    - every entry in consbiparts is one node of the consensus tree
1740    - for each node one has to know which taxa and which other nodes
1741      are *directly* descending from it
1742    - for every taxon/node number there is a flag that shows
1743      whether it descends from the node or not
1744    - '0' means that neither a taxon nor another node with the
1745          corresponding number decends from the node
1746      '1' means that the corresponding taxon descends from the node
1747      '2' means that the corresponding node descends from the node
1748      '3' means that the corresponding taxon and node descends from the node
1749 */
1750
1751 /* copy bipartition n of all different splitpatterns to consbiparts[k] */
1752 void copysplit(uli n, int k)
1753 {
1754         int i, j, col;
1755         uli z;
1756         
1757         col = 0;
1758         for (i = 0; i < splitlength; i++) {
1759                 z = splitpatterns[n*splitlength + i];
1760                 for (j = 0; j < 32 && col < Maxspc; j++) {
1761                         if (z & 1) consbiparts[k][col] = '1';
1762                         else consbiparts[k][col] = '0';
1763                         z = (z >> 1);
1764                         col++;
1765                 }
1766         }
1767 }
1768
1769 /* compute majority rule consensus tree */
1770 void makeconsensus()
1771 {
1772         int i, j, k, size, subnode;
1773         char chari, charj;
1774
1775         /* sort bipartition frequencies */
1776         qsort(splitfreqs, numbiparts, 2*sizeof(uli), ulicmp);
1777         /* how many bipartitions are included in the consensus tree */
1778         consincluded = 0;
1779         for (i = 0; i < numbiparts && i == consincluded; i++) {
1780                 if (2*splitfreqs[2*i] > Numtrial) consincluded = i + 1;
1781         }
1782
1783         /* collect all info about majority rule consensus tree */
1784         /* the +1 is due to the edge with the root */
1785         consconfid = new_ivector(consincluded + 1);
1786         conssizes = new_ivector(2*consincluded + 2);
1787         consbiparts = new_cmatrix(consincluded + 1, Maxspc);
1788         
1789         for (i = 0; i < consincluded; i++) {
1790                 /* copy partition to consbiparts */
1791                 copysplit(splitfreqs[2*i+1], i);
1792                 /* frequency in percent (rounded to integer) */
1793                 consconfid[i] = (int) floor(100.0*splitfreqs[2*i]/Numtrial + 0.5);
1794                 /* size of partition */
1795                 conssizes[2*i] = splitsizes[splitfreqs[2*i+1]];
1796                 conssizes[2*i+1] = i;
1797         }
1798         for (i = 0; i < Maxspc; i++) consbiparts[consincluded][i] = '1';
1799         consbiparts[consincluded][outgroup] = '0';
1800         consconfid[consincluded] = 100;
1801         conssizes[2*consincluded] = Maxspc - 1;
1802         conssizes[2*consincluded + 1] = consincluded;
1803
1804         /* sort bipartitions according to cluster size */
1805         qsort(conssizes, consincluded + 1, 2*sizeof(int), intcmp);
1806
1807         /* reconstruct consensus tree */
1808         for (i = 0; i < consincluded; i++) { /* try every node */
1809                 size = conssizes[2*i]; /* size of current node */
1810                 for (j = i + 1; j < consincluded + 1; j++) {
1811                 
1812                         /* compare only with nodes with more descendants */
1813                         if (size == conssizes[2*j]) continue;
1814                         
1815                         /* check whether node i is a subnode of j */
1816                         subnode = FALSE;
1817                         for (k = 0; k < Maxspc && !subnode; k++) {
1818                                 chari = consbiparts[ conssizes[2*i+1] ][k];
1819                                 if (chari != '0') {
1820                                         charj = consbiparts[ conssizes[2*j+1] ][k];
1821                                         if (chari == charj || charj == '3') subnode = TRUE;
1822                                 }
1823                         }
1824                         
1825                         /* if i is a subnode of j change j accordingly */
1826                         if (subnode) {
1827                                 /* remove subnode i from j */
1828                                 for (k = 0; k < Maxspc; k++) {
1829                                         chari = consbiparts[ conssizes[2*i+1] ][k];
1830                                         if (chari != '0') {
1831                                                 charj = consbiparts[ conssizes[2*j+1] ][k];
1832                                                 if (chari == charj)
1833                                                         consbiparts[ conssizes[2*j+1] ][k] = '0';
1834                                                 else if (charj == '3') {
1835                                                         if (chari == '1')
1836                                                                 consbiparts[ conssizes[2*j+1] ][k] = '2';
1837                                                         else if (chari == '2')
1838                                                                 consbiparts[ conssizes[2*j+1] ][k] = '1';
1839                                                         else {
1840                                                                 /* Consensus tree [1] */
1841                                                                 FPRINTF(STDOUTFILE "\n\n\nHALT: PLEASE REPORT ERROR H TO DEVELOPERS\n\n\n");
1842                                                                 exit(1);
1843                                                         }       
1844                                                 } else {
1845                                                         /* Consensus tree [2] */
1846                                                         FPRINTF(STDOUTFILE "\n\n\nHALT: PLEASE REPORT ERROR I TO DEVELOPERS\n\n\n");
1847                                                         exit(1);
1848                                                 }
1849                                         }
1850                                 }
1851                                 /* add link to subnode i in node j */
1852                                 charj = consbiparts[ conssizes[2*j+1] ][ conssizes[2*i+1] ];
1853                                 if (charj == '0')
1854                                         consbiparts[ conssizes[2*j+1] ][ conssizes[2*i+1] ] = '2';
1855                                 else if (charj == '1')
1856                                         consbiparts[ conssizes[2*j+1] ][ conssizes[2*i+1] ] = '3';
1857                                 else {
1858                                         /* Consensus tree [3] */
1859                                         FPRINTF(STDOUTFILE "\n\n\nHALT: PLEASE REPORT ERROR J TO DEVELOPERS\n\n\n");
1860                                         exit(1);
1861                                 }
1862                         }
1863                 }
1864         }
1865 }
1866
1867 /* prototype for recursion */
1868 void writenode(FILE *treefile, int node);
1869
1870 /* write node (writeconsensustree) */
1871 void writenode(FILE *treefile, int node)
1872 {
1873         int i, first;
1874         
1875         fprintf(treefile, "(");
1876         column++;
1877         /* write descending nodes */
1878         first = TRUE;
1879         for (i = 0; i < Maxspc; i++) {
1880                 if (consbiparts[node][i] == '2' ||
1881                 consbiparts[node][i] == '3') {
1882                         if (first) first = FALSE;
1883                         else {
1884                                 fprintf(treefile, ",");
1885                                 column++;
1886                         }
1887                         if (column > 60) {
1888                                 column = 2;
1889                                 fprintf(treefile, "\n");
1890                         }
1891                         /* write node i */
1892                         writenode(treefile, i);
1893
1894                         /* reliability value as internal label */
1895                         fprintf(treefile, "%d", consconfid[i]);
1896
1897                         column = column + 3;
1898                 }
1899         }
1900         /* write descending taxa */
1901         for (i = 0; i < Maxspc; i++) {
1902                 if (consbiparts[node][i] == '1' ||
1903                 consbiparts[node][i] == '3') {
1904                         if (first) first = FALSE;
1905                         else {
1906                                 fprintf(treefile, ",");
1907                                 column++;
1908                         }
1909                         if (column > 60) {
1910                                 column = 2;
1911                                 fprintf(treefile, "\n");
1912                         }
1913                         column += fputid(treefile, i);
1914                 }
1915         }
1916         fprintf(treefile, ")");
1917         column++;
1918 }
1919
1920 /* write consensus tree */
1921 void writeconsensustree(FILE *treefile)
1922 {
1923         int i, first;
1924         
1925         column = 1;
1926         fprintf(treefile, "(");
1927         column += fputid(treefile, outgroup) + 2;
1928         fprintf(treefile, ",");
1929         /* write descending nodes */
1930         first = TRUE;
1931         for (i = 0; i < Maxspc; i++) {
1932                 if (consbiparts[consincluded][i] == '2' ||
1933                 consbiparts[consincluded][i] == '3') {
1934                         if (first) first = FALSE;
1935                         else {
1936                                 fprintf(treefile, ",");
1937                                 column++;
1938                         }
1939                         if (column > 60) {
1940                                 column = 2;
1941                                 fprintf(treefile, "\n");
1942                         }
1943                         /* write node i */
1944                         writenode(treefile, i);
1945
1946                         /* reliability value as internal label */
1947                         fprintf(treefile, "%d", consconfid[i]);
1948
1949                         column = column + 3;
1950                 }
1951         }
1952         /* write descending taxa */
1953         for (i = 0; i < Maxspc; i++) {
1954                 if (consbiparts[consincluded][i] == '1' ||
1955                 consbiparts[consincluded][i] == '3') {
1956                         if (first) first = FALSE;
1957                         else {
1958                                 fprintf(treefile, ",");
1959                                 column++;
1960                         }
1961                         if (column > 60) {
1962                                 column = 2;
1963                                 fprintf(treefile, "\n");
1964                         }
1965                         column += fputid(treefile, i);
1966                 }
1967         }
1968         fprintf(treefile, ");\n");
1969 }
1970
1971 /* prototype for recursion */
1972 void nodecoordinates(int node);
1973
1974 /* establish node coordinates (plotconsensustree) */
1975 void nodecoordinates(int node)
1976 {
1977         int i, ymin, ymax, xcoordinate;
1978
1979         /* first establish coordinates of descending nodes */
1980         for (i = 0; i < Maxspc; i++) {
1981                 if (consbiparts[node][i] == '2' ||
1982                 consbiparts[node][i] == '3') 
1983                         nodecoordinates(i);
1984         }
1985         
1986         /* then establish coordinates of descending taxa */
1987         for (i = 0; i < Maxspc; i++) {
1988                 if (consbiparts[node][i] == '1' ||
1989                 consbiparts[node][i] == '3') {
1990                         /* y-coordinate of taxon i */
1991                         ycortax[i] = ytaxcounter;
1992                         ytaxcounter = ytaxcounter - 2;
1993                 }
1994         }
1995         
1996         /* then establish coordinates of this node */
1997         ymin = 2*Maxspc - 2;
1998         ymax = 0;
1999         xcoordinate = 0;
2000         for (i = 0; i < Maxspc; i++) {
2001                 if (consbiparts[node][i] == '2' ||
2002                 consbiparts[node][i] == '3') {
2003                         if (ycor[i] > ymax) ymax = ycor[i];
2004                         if (ycor[i] < ymin) ymin = ycor[i];
2005                         if (xcor[i] > xcoordinate) xcoordinate = xcor[i];
2006                 }
2007         }
2008         for (i = 0; i < Maxspc; i++) {
2009                 if (consbiparts[node][i] == '1' ||
2010                 consbiparts[node][i] == '3') {
2011                         if (ycortax[i] > ymax) ymax = ycortax[i];
2012                         if (ycortax[i] < ymin) ymin = ycortax[i];
2013                 }
2014         }
2015         ycormax[node] = ymax;
2016         ycormin[node] = ymin;
2017         ycor[node] = (int) floor(0.5*(ymax + ymin) + 0.5);
2018         if (xcoordinate == 0) xcoordinate = 9;
2019         xcor[node] = xcoordinate + 4;
2020 }
2021
2022 /* prototype for recursion */
2023 void drawnode(int node, int xold);
2024
2025 /* drawnode  (plotconsensustree) */
2026 void drawnode(int node, int xold)
2027 {
2028         int i, j;
2029         char buf[4];
2030         
2031         /* first draw vertical line */
2032         for (i = ycormin[node] + 1; i < ycormax[node]; i++)
2033                 treepict[xcor[node]][i] = ':';
2034                 
2035         /* then draw descending nodes */
2036         for (i = 0; i < Maxspc; i++) {
2037                 if (consbiparts[node][i] == '2' ||
2038                 consbiparts[node][i] == '3') 
2039                         drawnode(i, xcor[node]);
2040         }
2041         
2042         /* then draw descending taxa */
2043         for (i = 0; i < Maxspc; i++) {
2044                 if (consbiparts[node][i] == '1' ||
2045                 consbiparts[node][i] == '3') {
2046                         treepict[xcor[node]][ycortax[i]] = ':';
2047                         for (j = xcor[node] + 1; j < xsize-10; j++)
2048                                 treepict[j][ycortax[i]] = '-';
2049                         for (j = 0; j < 10; j++)
2050                                 treepict[xsize-10+j][ycortax[i]] = Identif[i][j];       
2051                 }
2052         }
2053         
2054         /* then draw internal edge with consensus value */
2055         treepict[xold][ycor[node]] = ':';
2056         treepict[xcor[node]][ycor[node]] = ':';
2057         for (i = xold + 1; i < xcor[node]-3; i++)
2058                 treepict[i][ycor[node]] = '-';
2059         sprintf(buf, "%d", consconfid[node]);
2060         if (consconfid[node] == 100) {
2061                 treepict[xcor[node]-3][ycor[node]] = buf[0];
2062                 treepict[xcor[node]-2][ycor[node]] = buf[1];
2063                 treepict[xcor[node]-1][ycor[node]] = buf[2];    
2064         } else {
2065                 treepict[xcor[node]-3][ycor[node]] = '-';
2066                 treepict[xcor[node]-2][ycor[node]] = buf[0];
2067                 treepict[xcor[node]-1][ycor[node]] = buf[1];
2068         }
2069 }
2070
2071 /* plot consensus tree */
2072 void plotconsensustree(FILE *plotfp)
2073 {
2074         int i, j, yroot, startree;
2075
2076         /* star tree or no star tree */
2077         if (consincluded == 0) {
2078                 startree = TRUE;
2079                 consincluded = 1; /* avoids problems with malloc */
2080         } else
2081                 startree = FALSE;
2082         
2083         /* memory for x-y-coordinates of each bipartition */
2084         xcor = new_ivector(consincluded);
2085         ycor = new_ivector(consincluded);
2086         ycormax = new_ivector(consincluded);
2087         ycormin = new_ivector(consincluded);
2088         if (startree) consincluded = 0; /* avoids problems with malloc */
2089
2090         /* y-coordinates of each taxon */
2091         ycortax = new_ivector(Maxspc);
2092         ycortax[outgroup] = 0;
2093         
2094         /* establish coordinates */
2095         ytaxcounter = 2*Maxspc - 2;
2096         
2097         /* first establish coordinates of descending nodes */
2098         for (i = 0; i < Maxspc; i++) {
2099                 if (consbiparts[consincluded][i] == '2' ||
2100                 consbiparts[consincluded][i] == '3') 
2101                         nodecoordinates(i);
2102         }
2103         
2104         /* then establish coordinates of descending taxa */
2105         for (i = 0; i < Maxspc; i++) {
2106                 if (consbiparts[consincluded][i] == '1' ||
2107                 consbiparts[consincluded][i] == '3') {
2108                         /* y-coordinate of taxon i */
2109                         ycortax[i] = ytaxcounter;
2110                         ytaxcounter = ytaxcounter - 2;
2111                 }
2112         }
2113
2114         /* then establish length of root edge and size of whole tree */
2115         yroot = 0;
2116         xsize = 0;
2117         for (i = 0; i < Maxspc; i++) {
2118                 if (consbiparts[consincluded][i] == '2' ||
2119                 consbiparts[consincluded][i] == '3') {
2120                         if (ycor[i] > yroot) yroot = ycor[i];
2121                         if (xcor[i] > xsize) xsize = xcor[i];
2122                 }
2123         }
2124         for (i = 0; i < Maxspc; i++) {
2125                 if (consbiparts[consincluded][i] == '1' ||
2126                 consbiparts[consincluded][i] == '3') {
2127                         if (ycortax[i] > yroot) yroot = ycortax[i];
2128                 }
2129         }
2130         if (xsize == 0) xsize = 9;
2131         /* size in x direction inclusive one blank on the left */
2132         xsize = xsize + 6; 
2133         
2134         /* change all x-labels so that (0,0) is down-left */
2135         for (i = 0; i < consincluded; i++)
2136                 xcor[i] = xsize-1-xcor[i];
2137         
2138         /* draw tree */
2139         treepict = new_cmatrix(xsize, 2*Maxspc-1);
2140         for (i = 0; i < xsize; i++)
2141                 for (j = 0; j < 2*Maxspc-1; j++)
2142                         treepict[i][j] = ' ';
2143         
2144         /* draw root */
2145         for (i = 1; i < yroot; i++)
2146                 treepict[1][i] = ':';
2147         treepict[1][0] = ':';
2148         for (i = 2; i < xsize - 10; i++)
2149                 treepict[i][0] = '-';
2150         for (i = 0; i < 10; i++)
2151                 treepict[xsize-10+i][0] = Identif[outgroup][i];
2152         
2153         /* then draw descending nodes */
2154         for (i = 0; i < Maxspc; i++) {
2155                 if (consbiparts[consincluded][i] == '2' ||
2156                 consbiparts[consincluded][i] == '3') 
2157                         drawnode(i, 1);
2158         }
2159         
2160         /* then draw descending taxa */
2161         for (i = 0; i < Maxspc; i++) {
2162                 if (consbiparts[consincluded][i] == '1' ||
2163                 consbiparts[consincluded][i] == '3') {
2164                         treepict[1][ycortax[i]] = ':';
2165                         for (j = 2; j < xsize-10; j++)
2166                                 treepict[j][ycortax[i]] = '-';
2167                         for (j = 0; j < 10; j++)
2168                                 treepict[xsize-10+j][ycortax[i]] = Identif[i][j];       
2169                 }
2170         }
2171         
2172         /* plot tree */
2173         for (i = 2*Maxspc-2; i > -1; i--) {
2174                 for (j = 0; j < xsize; j++)
2175                         fputc(treepict[j][i], plotfp);
2176                 fputc('\n', plotfp);
2177         }       
2178         
2179         free_ivector(xcor);
2180         free_ivector(ycor);
2181         free_ivector(ycormax);
2182         free_ivector(ycormin);
2183         free_ivector(ycortax);
2184         free_cmatrix(treepict);
2185 }
2186
2187
2188
2189 /******************************************************************************/
2190 /* storing and evaluating quartet branching information                       */
2191 /******************************************************************************/
2192
2193 /* general remarks:
2194
2195         for a quartet with the taxa a, b, c, d there are
2196         three possible binary trees:
2197         
2198                 1)  (a,b)-(c,d)
2199                 2)  (a,c)-(b,d)
2200                 3)  (a,d)-(b,c)
2201         
2202         For every quartet information about its branching structure is
2203         stored. With the functions  readquartet  and  writequartet
2204         this information can be accessed. For every quartet (a,b,c,d)
2205         with a < b < c < d (taxa) the branching information is encoded
2206         using 4 bits:
2207         
2208         value          8             4             2             1
2209                 +-------------+-------------+-------------+-------------+
2210                 |  not used   |   tree 3    |   tree 2    |   tree 1    |
2211                 +-------------+-------------+-------------+-------------+
2212
2213         If the branching structure of the taxa corresponds to one of the
2214         three trees the corresponding bit is set. If the branching structure
2215         is unclear because two of the three trees have the same maximum
2216         likelihood value the corresponding two bits are set. If the branching
2217         structure is completely unknown all the bits are set (the highest
2218         bit is always cleared because it is not used).
2219
2220 */
2221
2222 /* allocate memory for quartets */
2223 unsigned char *mallocquartets(int taxa)
2224 {
2225         uli nc, numch;
2226         unsigned char *qinfo;
2227         
2228         /* compute number of quartets */
2229         Numquartets = (uli) taxa*(taxa-1)*(taxa-2)*(taxa-3)/24;
2230         if (Numquartets % 2 == 0) { /* even number */
2231                 numch = Numquartets/2;
2232         } else { /* odd number */
2233                 numch = (Numquartets + 1)/2;
2234         }
2235         /* allocate memory */
2236         qinfo = (unsigned char *) malloc(numch * sizeof(unsigned char) );
2237         if (qinfo == NULL) maerror("quartetinfo in mallocquartets");
2238         for (nc = 0; nc < numch; nc++) qinfo[nc] = 0;
2239         return(qinfo);
2240 }
2241
2242 /* free quartet memory */
2243 void freequartets()
2244 {       
2245         free(quartetinfo);
2246 }
2247
2248 /* read quartet info - a < b < c < d */
2249 unsigned char readquartet(int a, int b, int c, int d)
2250 {
2251         uli qnum;
2252
2253         qnum = (uli) a
2254                         + (uli) b*(b-1)/2
2255                         + (uli) c*(c-1)*(c-2)/6
2256                         + (uli) d*(d-1)*(d-2)*(d-3)/24;
2257         if (qnum % 2 == 0) { /* even number */
2258                 /* bits 0 to 3 */
2259                 return (quartetinfo[qnum/2] & (unsigned char) 15);
2260         } else { /* odd number */
2261                 /* bits 4 to 7 */
2262                 return ((quartetinfo[(qnum-1)/2] & (unsigned char) 240)>>4);
2263         }
2264 }
2265
2266 /* write quartet info - a < b < c < d, 0 <= info <= 15 */
2267 void writequartet(int a, int b, int c, int d, unsigned char info)
2268 {
2269         uli qnum;
2270
2271         qnum = (uli) a
2272                         + (uli) b*(b-1)/2
2273                         + (uli) c*(c-1)*(c-2)/6
2274                         + (uli) d*(d-1)*(d-2)*(d-3)/24;
2275         if (qnum % 2 == 0) { /* even number */
2276                 /* bits 0 to 3 */
2277                 quartetinfo[qnum/2] =
2278                         ((quartetinfo[qnum/2] & (unsigned char) 240) |
2279                         (info & (unsigned char) 15));
2280         } else { /* odd number */
2281                 /* bits 4 to 7 */
2282                 quartetinfo[(qnum-1)/2] =
2283                         ((quartetinfo[(qnum-1)/2] & (unsigned char) 15) |
2284                         ((info & (unsigned char) 15)<<4));
2285         }
2286 }
2287
2288 /* prototypes */
2289 void openfiletowrite(FILE **, char[], char[]);
2290 void closefile(FILE *);
2291
2292 /* sorts three doubles in descending order */
2293 void sort3doubles(dvector num, ivector order)
2294 {
2295         if (num[0] > num[1]) {
2296                 if(num[2] > num[0]) {
2297                         order[0] = 2;
2298                         order[1] = 0;
2299                         order[2] = 1;           
2300                 } else if (num[2] < num[1]) {
2301                         order[0] = 0;
2302                         order[1] = 1;
2303                         order[2] = 2;           
2304                 } else {
2305                         order[0] = 0;
2306                         order[1] = 2;
2307                         order[2] = 1;           
2308                 }
2309         } else {
2310                 if(num[2] > num[1]) {
2311                         order[0] = 2;
2312                         order[1] = 1;
2313                         order[2] = 0;           
2314                 } else if (num[2] < num[0]) {
2315                         order[0] = 1;
2316                         order[1] = 0;
2317                         order[2] = 2;           
2318                 } else {
2319                         order[0] = 1;
2320                         order[1] = 2;
2321                         order[2] = 0;           
2322                 }
2323         }
2324 }
2325
2326 /* checks out all possible quartets */
2327 void computeallquartets()
2328 {       
2329         double onethird;
2330         uli nq;
2331         unsigned char treebits[3];
2332         FILE *lhfp;
2333 #       if ! PARALLEL
2334                 int a, b, c, i;
2335                 double qc2, mintogo, minutes, hours, temp;
2336                 double temp1, temp2, temp3;
2337                 unsigned char discreteweight[3];
2338 #       endif
2339         
2340         onethird = 1.0/3.0;
2341         treebits[0] = (unsigned char) 1;
2342         treebits[1] = (unsigned char) 2;
2343         treebits[2] = (unsigned char) 4;
2344         
2345         if (show_optn) { /* list all unresolved quartets */
2346                 openfiletowrite(&unresfp, UNRESOLVED, "unresolved quartet trees");
2347                 fprintf(unresfp, "List of all completely unresolved quartets:\n\n");
2348         }
2349
2350         nq = 0;
2351         badqs = 0;
2352         
2353         /* start timer - percentage of completed quartets */
2354         time(&time0);
2355         time1 = time0;
2356         mflag = 0;
2357         
2358 #       if PARALLEL
2359         {
2360            schedtype sched; 
2361            int flag;
2362            MPI_Status stat;
2363            int dest = 1;
2364            uli qaddr  =0;
2365            uli qamount=0;
2366            int qblocksent = 0;
2367            int apr;
2368            uli sq, noq;
2369            initsched(&sched, numquarts(Maxspc), PP_NumProcs-1, 4);
2370            qamount=sgss(&sched);
2371            while (qamount > 0) {
2372               if (PP_emptyslave()) {
2373                  PP_RecvQuartBlock(0, &sq, &noq, quartetinfo, &apr);
2374                  qblocksent -= noq;
2375               }
2376               dest = PP_getslave();
2377               PP_SendDoQuartBlock(dest, qaddr, qamount, (approxqp ? APPROX : EXACT));
2378               qblocksent += qamount;
2379               qaddr += qamount;
2380               qamount=sgss(&sched);
2381                  
2382               MPI_Iprobe(MPI_ANY_SOURCE, PP_QUARTBLOCKSPECS, PP_Comm, &flag, &stat);
2383               while (flag) {
2384                  PP_RecvQuartBlock(0, &sq, &noq, quartetinfo, &apr);
2385                  qblocksent -= noq;
2386                  MPI_Iprobe(MPI_ANY_SOURCE, PP_QUARTBLOCKSPECS, PP_Comm, &flag, &stat);
2387               }
2388            }
2389            while (qblocksent > 0) {
2390               PP_RecvQuartBlock(0, &sq, &noq, quartetinfo, &apr);
2391               qblocksent -= noq;
2392            }
2393         }
2394 #       else /* PARALLEL */
2395
2396         addtimes(GENERAL, &tarr);
2397         if (savequartlh_optn) {
2398                 openfiletowrite(&lhfp, ALLQUARTLH, "all quartet likelihoods");
2399                 if (saveqlhbin_optn) writetpqfheader(Maxspc, lhfp, 3);
2400                 else                 writetpqfheader(Maxspc, lhfp, 4); 
2401         }
2402
2403         for (i = 3; i < Maxspc; i++) 
2404                 for (c = 2; c < i; c++) 
2405                         for (b = 1; b < c; b++)
2406                                 for (a = 0; a < b; a++) {
2407                                                         nq++;
2408
2409                                                         /* generate message every 15 minutes */
2410                                                         /* check timer */
2411                                                         time(&time2);
2412                                                         if ( (time2 - time1) > 900) {
2413                                                                 /* every 900 seconds */
2414                                                                 /* percentage of completed quartets */
2415                                                                 if (mflag == 0) {
2416                                                                         FPRINTF(STDOUTFILE "\n");
2417                                                                         mflag = 1;
2418                                                                 }
2419                                                                 qc2 = 100.*nq/Numquartets;
2420                                                                 mintogo = (100.0-qc2) *
2421                                                                         (double) (time2-time0)/60.0/qc2;
2422                                                                 hours = floor(mintogo/60.0);
2423                                                                 minutes = mintogo - 60.0*hours;
2424                                                                 FPRINTF(STDOUTFILE "%.2f%%", qc2);
2425                                                                 FPRINTF(STDOUTFILE " completed (remaining");
2426                                                                 FPRINTF(STDOUTFILE " time: %.0f", hours);
2427                                                                 FPRINTF(STDOUTFILE " hours %.0f", minutes);
2428                                                                 FPRINTF(STDOUTFILE " minutes)\n");
2429                                                                 fflush(STDOUT);
2430                                                                 time1 = time2;
2431                                                         }
2432                                                         
2433                                                         /* maximum likelihood values */
2434                                                            
2435                                                         /* exact or approximate maximum likelihood values */
2436                                                         compute_quartlklhds(a,b,c,i,&qweight[0],&qweight[1],&qweight[2], (approxqp ? APPROX : EXACT));
2437                                                         
2438                                                         if (savequartlh_optn) {
2439                                                                 if (saveqlhbin_optn)
2440                                                                         fwrite(qweight, sizeof(double), 3, lhfp);
2441                                                                 else
2442                                                                         fprintf(lhfp, "(%d,%d,%d,%d)\t%f\t%f\t%f\n", a, b, c, i, 
2443                                                                         qweight[0], qweight[1], qweight[2]); 
2444                                                         }
2445
2446                                                         /* sort in descending order */
2447                                                         sort3doubles(qweight, qworder);
2448
2449                                                         if (usebestq_optn) {
2450                                                                 sqorder[2] = 2;
2451                                                                 discreteweight[sqorder[2]] = treebits[qworder[0]];
2452                                                                 if (qweight[qworder[0]] == qweight[qworder[1]]) {
2453                                                                    discreteweight[sqorder[2]] = discreteweight[sqorder[2]] || treebits[qworder[1]];
2454                                                                    if (qweight[qworder[1]] == qweight[qworder[2]]) {
2455                                                                       discreteweight[sqorder[2]] = discreteweight[sqorder[2]] || treebits[qworder[2]];
2456                                                                       discreteweight[sqorder[2]] = 7;
2457                                                                    } 
2458                                                                 }
2459                                                         } else {
2460
2461                                                                 /* compute Bayesian weights */
2462                                                                 qweight[qworder[1]] = exp(qweight[qworder[1]]-qweight[qworder[0]]);
2463                                                                 qweight[qworder[2]] = exp(qweight[qworder[2]]-qweight[qworder[0]]);
2464                                                                 qweight[qworder[0]] = 1.0;
2465                                                                 temp = qweight[0] + qweight[1] + qweight[2];
2466                                                                 qweight[0] = qweight[0]/temp;
2467                                                                 qweight[1] = qweight[1]/temp;
2468                                                                 qweight[2] = qweight[2]/temp;
2469                                                                 
2470                                                                 /* square deviations */
2471                                                                 temp1 = 1.0 - qweight[qworder[0]];
2472                                                                 sqdiff[0] = temp1 * temp1 +
2473                                                                            qweight[qworder[1]] * qweight[qworder[1]] +
2474                                                                            qweight[qworder[2]] * qweight[qworder[2]];
2475                                                                 discreteweight[0] = treebits[qworder[0]];
2476      
2477                                                                 temp1 = 0.5 - qweight[qworder[0]];
2478                                                                 temp2 = 0.5 - qweight[qworder[1]];
2479                                                                 sqdiff[1] = temp1 * temp1 + temp2 * temp2 +
2480                                                                            qweight[qworder[2]] * qweight[qworder[2]];           
2481                                                                 discreteweight[1] = treebits[qworder[0]] + treebits[qworder[1]];
2482                                                                    
2483                                                                 temp1 = onethird - qweight[qworder[0]];
2484                                                                 temp2 = onethird - qweight[qworder[1]];
2485                                                                 temp3 = onethird - qweight[qworder[2]];
2486                                                                 sqdiff[2] = temp1 * temp1 + temp2 * temp2 + temp3 * temp3;
2487                                                                 discreteweight[2] = (unsigned char) 7;                                                     
2488                                                         
2489                                                                 /* sort in descending order */
2490                                                                 sort3doubles(sqdiff, sqorder);
2491                                                         }
2492                                                         
2493                                                         /* determine best discrete weight */
2494                                                         writequartet(a, b, c, i, discreteweight[sqorder[2]]);
2495                                                 
2496                                                         /* counting completely unresolved quartets */
2497                                                         if (discreteweight[sqorder[2]] == 7) {
2498                                                                 badqs++;
2499                                                                 badtaxon[a]++;
2500                                                                 badtaxon[b]++;
2501                                                                 badtaxon[c]++;
2502                                                                 badtaxon[i]++;
2503                                                                 if (show_optn) {
2504                                                                         fputid10(unresfp, a);
2505                                                                         fprintf(unresfp, "  ");
2506                                                                         fputid10(unresfp, b);
2507                                                                         fprintf(unresfp, "  ");
2508                                                                         fputid10(unresfp, c);
2509                                                                         fprintf(unresfp, "  ");
2510                                                                         fputid(unresfp, i);
2511                                                                         fprintf(unresfp, "\n");
2512                                                                 }
2513                                                         }
2514                                                         addtimes(QUARTETS, &tarr);
2515                                                 }
2516         if (savequartlh_optn) {
2517                 closefile(lhfp);
2518         }
2519         if (show_optn)
2520                 closefile(unresfp);
2521         if (mflag == 1)
2522                 FPRINTF(STDOUTFILE "\n");
2523 #       endif /* PARALLEL */
2524
2525 }
2526                                                         
2527 /* check the branching structure between the leaves (not the taxa!)
2528    A, B, C, and I (A, B, C, I don't need to be ordered). As a result,
2529    the two leaves that are closer related to each other than to leaf I
2530    are found in chooseA and chooseB. If the branching structure is
2531    not uniquely defined, ChooseA and ChooseB are chosen randomly
2532    from the possible taxa */
2533 void checkquartet(int A, int B, int C, int I)
2534 {
2535         int i, j, a, b, taxon[5], leaf[5], ipos;
2536         unsigned char qresult;
2537         int notunique = FALSE;
2538
2539         /* The relationship between leaves and taxa is defined by trueID */
2540         taxon[1] = trueID[A]; /* taxon number */
2541         leaf[1] = A;          /* leaf number  */
2542         taxon[2] = trueID[B];
2543         leaf[2] = B;
2544         taxon[3] = trueID[C];
2545         leaf[3] = C;
2546         taxon[4] = trueID[I];
2547         leaf[4] = I;
2548
2549         /* sort for taxa */
2550         /* Source: Numerical Recipes (PIKSR2.C) */
2551         for (j = 2; j <= 4; j++) {
2552                 a = taxon[j];
2553                 b = leaf[j];
2554                 i = j-1;
2555                 while (i > 0 && taxon[i] > a) {
2556                         taxon[i+1] = taxon[i];
2557                         leaf[i+1] = leaf[i];
2558                         i--;
2559                 }
2560                 taxon[i+1] = a;
2561                 leaf[i+1] = b;
2562         }
2563
2564         /* where is leaf I ? */
2565         ipos = 1;
2566         while (leaf[ipos] != I) ipos++;
2567
2568         /* look at sequence quartet */
2569         qresult = readquartet(taxon[1], taxon[2], taxon[3], taxon[4]);
2570
2571         /* chooseA and chooseB */
2572         do {    
2573                 switch (qresult) {
2574                 
2575                         /* one single branching structure */
2576                 
2577                         /* 001 */
2578                         case 1:         if (ipos == 1 || ipos == 2) {
2579                                                         chooseA = leaf[3];
2580                                                         chooseB = leaf[4];
2581                                                 } else {
2582                                                         chooseA = leaf[1];
2583                                                         chooseB = leaf[2];
2584                                                 }
2585                                                 notunique = FALSE;
2586                                                 break;
2587
2588                         /* 010 */
2589                         case 2:         if (ipos == 1 || ipos == 3) {
2590                                                         chooseA = leaf[2];
2591                                                         chooseB = leaf[4];
2592                                                 } else {
2593                                                         chooseA = leaf[1];
2594                                                         chooseB = leaf[3];
2595                                                 }
2596                                                 notunique = FALSE;
2597                                                 break;
2598
2599                         /* 100 */
2600                         case 4:         if (ipos == 1 || ipos == 4) {
2601                                                         chooseA = leaf[2];
2602                                                         chooseB = leaf[3];
2603                                                 } else {
2604                                                         chooseA = leaf[1];
2605                                                         chooseB = leaf[4];
2606                                                 }
2607                                                 notunique = FALSE;
2608                                                 break;
2609
2610                         /* two possible branching structures */         
2611
2612                         /* 011 */
2613                         case 3:         if (randominteger(2)) qresult = 1;
2614                                                 else qresult = 2;
2615                                                 notunique = TRUE;
2616                                                 break;
2617
2618                         /* 101 */
2619                         case 5:         if (randominteger(2)) qresult = 1;
2620                                                 else qresult = 4;
2621                                                 notunique = TRUE;
2622                                                 break;
2623
2624                         /* 110 */
2625                         case 6:         if (randominteger(2)) qresult = 2;
2626                                                 else qresult = 4;
2627                                                 notunique = TRUE;
2628                                                 break;
2629
2630                         /* three possible branching structures */
2631
2632                         /* 111 */
2633                         case 7:         qresult = (1 << randominteger(3)); /* 1, 2, or 4 */
2634                                                 notunique = TRUE;
2635                                                 break;
2636
2637                         default:        /* Program error [checkquartet] */
2638 #if PARALLEL
2639                                                 FPRINTF(STDOUTFILE "\n\n\n(%2d)HALT: PLEASE REPORT ERROR K-PARALLEL TO DEVELOPERS (%d,%d,%d,%d) = %ld\n\n\n", 
2640                                                 PP_Myid, taxon[1], taxon[2], taxon[3], taxon[4],
2641                                                 quart2num(taxon[1], taxon[2], taxon[3], taxon[4]));
2642 #else
2643                                                 FPRINTF(STDOUTFILE "\n\n\nHALT: PLEASE REPORT ERROR K TO DEVELOPERS\n\n\n");
2644 #endif
2645                                                 
2646                 }
2647         } while (notunique);
2648
2649         return;
2650 }
2651