5 * Part of TREE-PUZZLE 5.0 (June 2000)
7 * (c) 1999-2000 by Heiko A. Schmidt, Korbinian Strimmer,
8 * M. Vingron, and Arndt von Haeseler
9 * (c) 1995-1999 by Korbinian Strimmer and Arndt von Haeseler
11 * All parts of the source except where indicated are distributed under
12 * the GNU public licence. See http://www.opensource.org for details.
16 /* Modified by Christian Zmasek to:
17 - Allow 8000 seqs (for pairwise dist. calc.).
21 !WARNING: Use ONLY together with FORESTER/RIO!
22 !For all other puposes download the excellent original!
24 last modification: 05/19/01
27 void getsizesites(FILE *ifp):
33 void readid(FILE *infp, int t):
35 for (i = 0; i < 10; i++) { -> for (i = 0; i < 26; i++) {
37 for (i = 9; i > -1; i--) { -> for (i = 25; i > -1; i--) {
39 for (j = 0; (j < 10) && (flag == TRUE); j++) -> for (j = 0; (j < 26) && (flag == TRUE); j++)
45 Identif = new_cmatrix(t, 10); -> Identif = new_cmatrix(t, 26);
47 for (j = 0; j < 10; j++) -> for (j = 0; j < 26; j++)
51 fputid10(FILE *ofp, int t):
53 for (i = 0; i < 10; i++) -> for (i = 0; i < 26; i++)
57 int fputid(FILE *ofp, int t):
59 while (Identif[t][i] != ' ' && i < 10) { -> while (Identif[t][i] != ' ' && i < 26) {
76 /******************************************************************************/
78 /******************************************************************************/
80 /* read ten characters of current line as identifier */
81 void readid(FILE *infp, int t)
85 for (i = 0; i < 26; i++) { /* CZ 05/19/01 */
87 if (ci == EOF || !isprint(ci)) {
88 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (no name for sequence %d)\n\n\n", t+1);
91 Identif[t][i] = (char) ci;
93 /* convert leading blanks in taxon name to underscores */
95 for (i = 25; i > -1; i--) { /* CZ 05/19/01 */
97 if (Identif[t][i] != ' ') flag = TRUE;
99 if (Identif[t][i] == ' ') Identif[t][i] = '_';
102 /* check whether this name is already used */
103 for (i = 0; i < t; i++) { /* compare with all other taxa */
104 flag = TRUE; /* assume identity */
105 for (j = 0; (j < 26) && (flag == TRUE); j++) /* CZ 05/19/01 */
106 if (Identif[t][j] != Identif[i][j])
109 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (multiple occurence of sequence name '");
111 FPRINTF(STDOUTFILE "')\n\n\n");
117 /* read next allowed character */
118 char readnextcharacter(FILE *ifp, int notu, int nsite)
122 /* ignore blanks and control characters except newline */
124 if (fscanf(ifp, "%c", &c) != 1) {
125 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (missing character at position %d in sequence '", nsite + 1);
126 fputid(STDOUT, notu);
127 FPRINTF(STDOUTFILE "')\n\n\n");
130 } while (c == ' ' || (iscntrl((int) c) && c != '\n'));
134 /* skip rest of the line */
135 void skiprestofline(FILE* ifp, int notu, int nsite)
139 /* read chars until the first newline */
143 FPRINTF(STDOUTFILE "Unable to proceed (missing newline at position %d in sequence '", nsite + 1);
144 fputid(STDOUT, notu);
145 FPRINTF(STDOUTFILE "')\n\n\n");
148 } while ((char) ci != '\n');
151 /* skip control characters and blanks */
152 void skipcntrl(FILE *ifp, int notu, int nsite)
156 /* read over all control characters and blanks */
160 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (missing character at position %d in sequence '", nsite + 1);
161 fputid(STDOUT, notu);
162 FPRINTF(STDOUTFILE "')\n\n\n");
165 } while (iscntrl(ci) || (char) ci == ' ');
166 /* go one character back */
167 if (ungetc(ci, ifp) == EOF) {
168 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (positioning error at position %d in sequence '", nsite + 1);
169 fputid(STDOUT, notu);
170 FPRINTF(STDOUTFILE "')\n\n\n");
175 /* read sequences of one data set */
176 void getseqs(FILE *ifp)
178 int notu, nsite, endofline, linelength, i;
181 seqchars = new_cmatrix(Maxspc, Maxseqc);
182 /* read all characters */
183 nsite = 0; /* next site to be read */
184 while (nsite < Maxseqc) {
185 /* read first taxon */
187 /* go to next true line */
188 skiprestofline(ifp, notu, nsite);
189 skipcntrl(ifp, notu, nsite);
190 if (nsite == 0) readid(ifp, notu);
194 c = readnextcharacter(ifp, notu, nsite + linelength);
195 if (c == '\n') endofline = TRUE;
197 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (invalid character '.' at position ");
198 FPRINTF(STDOUTFILE "%d in first sequence)\n\n\n", nsite + linelength + 1);
200 } else if (nsite + linelength < Maxseqc) {
201 /* change to upper case */
202 seqchars[notu][nsite + linelength] = (char) toupper((int) c);
206 skiprestofline(ifp, notu, nsite + linelength);
208 } while (!endofline);
209 if (linelength == 0) {
210 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (line with length 0 at position %d in sequence '", nsite + 1);
211 fputid(STDOUT, notu);
212 FPRINTF(STDOUTFILE "')\n\n\n");
215 /* read other taxa */
216 for (notu = 1; notu < Maxspc; notu++) {
217 /* go to next true line */
218 if (notu != 1) skiprestofline(ifp, notu, nsite);
219 skipcntrl(ifp, notu, nsite);
220 if (nsite == 0) readid(ifp, notu);
221 for (i = nsite; i < nsite + linelength; i++) {
222 c = readnextcharacter(ifp, notu, i);
223 if (c == '\n') { /* too short */
224 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (line to short at position %d in sequence '", i + 1);
225 fputid(STDOUT, notu);
226 FPRINTF(STDOUTFILE "')\n\n\n");
228 } else if (c == '.') {
229 seqchars[notu][i] = seqchars[0][i];
231 /* change to upper case */
232 seqchars[notu][i] = (char) toupper((int) c);
236 nsite = nsite + linelength;
240 /* initialize identifer array */
245 Identif = new_cmatrix(t, 26); /* CZ 05/19/01 */
246 for (i = 0; i < t; i++)
247 for (j = 0; j < 26; j++) /* CZ 05/19/01 */
251 /* print identifier of specified taxon in full 10 char length */
252 void fputid10(FILE *ofp, int t)
256 for (i = 0; i < 26; i++) fputc(Identif[t][i], ofp); /* CZ 05/19/01 */
259 /* print identifier of specified taxon up to first space */
260 int fputid(FILE *ofp, int t)
265 while (Identif[t][i] != ' ' && i < 26) { /* CZ 05/19/01 */
266 fputc(Identif[t][i], ofp);
272 /* read first line of sequence data set */
273 void getsizesites(FILE *ifp)
275 if (fscanf(ifp, "%d", &Maxspc) != 1) {
276 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (missing number of sequences)\n\n\n");
279 if (fscanf(ifp, "%d", &Maxseqc) != 1) {
280 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (missing number of sites)\n\n\n");
285 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (less than 4 sequences)\n\n\n");
288 if (Maxspc > 8000) { /* CZ 05/19/01 */
289 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (more than 8000 sequences)\n\n\n");
293 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (no sequence sites)\n\n\n");
296 Maxbrnch = 2*Maxspc - 3;
299 /* read one data set - PHYLIP interleaved */
300 void getdataset(FILE *ifp)
306 /* guess data type */
309 uli numnucs, numchars, numbins;
313 /* count A, C, G, T, U, N */
317 for (notu = 0; notu < Maxspc; notu++)
318 for (nsite = 0; nsite < Maxseqc; nsite++) {
319 c = seqchars[notu][nsite];
320 if (c == 'A' || c == 'C' || c == 'G' ||
321 c == 'T' || c == 'U' || c == 'N') numnucs++;
322 if (c != '-' && c != '?') numchars++;
323 if (c == '0' || c == '1') numbins++;
325 if (numchars == 0) numchars = 1;
326 /* more than 85 % frequency means nucleotide data */
327 if ((double) numnucs / (double) numchars > 0.85) return 0;
328 else if ((double) numbins / (double) numchars > 0.2) return 2;
332 /* translate characters into format used by ML engine */
333 void translatedataset()
340 /* determine Maxsite - number of ML sites per taxon */
341 if (data_optn == 0 && SH_optn) {
343 Maxsite = Maxseqc / 3;
345 Maxsite = Maxseqc / 2; /* assume doublets */
349 if (data_optn == 0 && (Maxsite % 3) == 0 && !SH_optn) {
350 if (codon_optn == 1 || codon_optn == 2 || codon_optn == 3)
351 Maxsite = Maxsite / 3; /* only one of the three codon positions */
353 Maxsite = 2*(Maxsite / 3); /* 1st + 2nd codon positions */
357 if (Seqchar != NULL) free_cmatrix(Seqchar);
358 Seqchar = new_cmatrix(Maxspc, Maxsite);
361 if (data_optn == 0 && SH_optn)
362 code = new_cvector(2);
364 code = new_cvector(1);
366 /* decode characters */
367 if (data_optn == 0 && SH_optn) { /* SH doublets */
369 for (notu = 0; notu < Maxspc; notu++) {
370 for (sn = 0; sn < Maxsite; sn++) {
371 for (co = 0; co < 2; co++) {
373 c = seqchars[notu][sn*3 + co];
375 c = seqchars[notu][sn*2 + co];
378 Seqchar[notu][sn] = code2int(code);
382 } else if (!(data_optn == 0 && (Maxseqc % 3) == 0)) { /* use all */
384 for (notu = 0; notu < Maxspc; notu++) {
385 for (sn = 0; sn < Maxsite; sn++) {
386 code[0] = seqchars[notu][sn];
387 Seqchar[notu][sn] = code2int(code);
391 } else { /* codons */
393 for (notu = 0; notu < Maxspc; notu++) {
394 for (sn = 0; sn < Maxsite; sn++) {
395 if (codon_optn == 1 || codon_optn == 2 || codon_optn == 3)
396 code[0] = seqchars[notu][sn*3+codon_optn-1];
397 else if (codon_optn == 4) {
399 code[0] = seqchars[notu][(sn/2)*3];
401 code[0] = seqchars[notu][((sn-1)/2)*3+1];
403 code[0] = seqchars[notu][sn];
404 Seqchar[notu][sn] = code2int(code);
412 /* estimate mean base frequencies from translated data set */
413 void estimatebasefreqs()
418 tpmradix = gettpmradix();
420 if (Freqtpm != NULL) free_dvector(Freqtpm);
421 Freqtpm = new_dvector(tpmradix);
423 if (Basecomp != NULL) free_imatrix(Basecomp);
424 Basecomp = new_imatrix(Maxspc, tpmradix);
426 gene = (uli *) malloc((unsigned) ((tpmradix + 1) * sizeof(uli)));
427 if (gene == NULL) maerror("gene in estimatebasefreqs");
429 for (i = 0; i < tpmradix + 1; i++) gene[i] = 0;
430 for (i = 0; i < Maxspc; i++)
431 for (j = 0; j < tpmradix; j++) Basecomp[i][j] = 0;
432 for (i = 0; i < Maxspc; i++)
433 for (j = 0; j < Maxsite; j++) {
434 gene[(int) Seqchar[i][j]]++;
435 if (Seqchar[i][j] != tpmradix) Basecomp[i][(int) Seqchar[i][j]]++;
438 all = Maxspc * Maxsite - gene[tpmradix];
439 if (all != 0) { /* normal case */
440 for (i = 0; i < tpmradix; i++)
441 Freqtpm[i] = (double) gene[i] / (double) all;
442 } else { /* pathological case with no unique character in data set */
443 for (i = 0; i < tpmradix; i++)
444 Freqtpm[i] = 1.0 / (double) tpmradix;
452 /* guess model of substitution */
455 double c1, c2, c3, c4, c5, c6;
464 blosum62_optn = FALSE;
473 if (data_optn == 1) { /* amino acids */
475 /* chi2 fit to amino acid frequencies */
478 a = new_dmatrix(20,20);
479 /* chi2 distance Dayhoff */
482 for (i = 0; i < 20; i++)
483 c1 = c1 + (Freqtpm[i]-f[i])*(Freqtpm[i]-f[i]);
484 /* chi2 distance JTT */
487 for (i = 0; i < 20; i++)
488 c2 = c2 + (Freqtpm[i]-f[i])*(Freqtpm[i]-f[i]);
489 /* chi2 distance mtREV */
492 for (i = 0; i < 20; i++)
493 c3 = c3 + (Freqtpm[i]-f[i])*(Freqtpm[i]-f[i]);
494 /* chi2 distance VT */
497 for (i = 0; i < 20; i++)
498 c4 = c4 + (Freqtpm[i]-f[i])*(Freqtpm[i]-f[i]);
499 /* chi2 distance WAG */
502 for (i = 0; i < 20; i++)
503 c5 = c5 + (Freqtpm[i]-f[i])*(Freqtpm[i]-f[i]);
504 /* chi2 distance cpREV */
507 for (i = 0; i < 20; i++)
508 c6 = c6 + (Freqtpm[i]-f[i])*(Freqtpm[i]-f[i]);
514 if ((c1 < c2) && (c1 < c3) && (c1 < c4) && (c1 < c5)) {
522 FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on nuclear DNA)\n");
524 if ((c2 < c3) && (c2 < c4) && (c2 < c5)) {
532 FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on nuclear DNA)\n");
534 if ((c3 < c4) && (c3 < c5)) {
542 FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on mtDNA)\n");
552 FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on nuclear DNA)\n");
561 FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on nuclear DNA)\n");
562 } /* if c4 else c5 */
563 } /* if c3 else c4 */
569 if ((c1 < c2) && (c1 < c3) && (c1 < c4) && (c1 < c5) && (c1 < c6)) {
577 FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on nuclear DNA)\n");
579 if ((c2 < c3) && (c2 < c4) && (c2 < c5) && (c2 < c6)) {
587 FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on nuclear DNA)\n");
589 if ((c3 < c4) && (c3 < c5) && (c3 < c6)) {
597 FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on mtDNA)\n");
599 if ((c4 < c5) && (c4 < c6)) {
607 FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on nuclear DNA)\n");
617 FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on nuclear DNA)\n");
627 FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on cpDNA)\n");
628 } /* if c5 else c6 */
629 } /* if c4 else c5 */
630 } /* if c3 else c4 */
635 } else if (data_optn == 0) {
636 FPRINTF(STDOUTFILE "(consists very likely of nucleotides)\n");
638 FPRINTF(STDOUTFILE "(consists very likely of binary state data)\n");
643 /******************************************************************************/
644 /* functions for representing and building puzzling step trees */
645 /******************************************************************************/
647 /* initialize tree with the following starting configuration
659 /* allocate the memory for the whole tree */
661 /* allocate memory for vector with all the edges of the tree */
662 edge = (ONEEDGE *) calloc(Maxbrnch, sizeof(ONEEDGE) );
663 if (edge == NULL) maerror("edge in inittree");
665 /* allocate memory for vector with edge numbers of leaves */
666 edgeofleaf = (int *) calloc(Maxspc, sizeof(int) );
667 if (edgeofleaf == NULL) maerror("edgeofleaf in inittree");
669 /* allocate memory for all the edges the edge map */
670 for (i = 0; i < Maxbrnch; i++) {
671 edge[i].edgemap = (int *) calloc(Maxbrnch, sizeof(int) );
672 if (edge[i].edgemap == NULL) maerror("edgemap in inittree");
675 /* number all edges */
676 for (i = 0; i < Maxbrnch; i++) edge[i].numedge = i;
678 /* initialize tree */
684 (edge[0].edgemap)[0] = 0; /* you are on the right edge */
685 (edge[0].edgemap)[1] = 4; /* go down left for leaf 1 */
686 (edge[0].edgemap)[2] = 5; /* go down right for leaf 2 */
687 (edge[1].edgemap)[0] = 1; /* go up for leaf 0 */
688 (edge[1].edgemap)[1] = 0; /* you are on the right edge */
689 (edge[1].edgemap)[2] = 3; /* go up/down right for leaf 2 */
690 (edge[2].edgemap)[0] = 1; /* go up for leaf 0 */
691 (edge[2].edgemap)[1] = 2; /* go up/down left for leaf 1 */
692 (edge[2].edgemap)[2] = 0; /* you are on the right edge */
694 /* interconnection */
696 edge[0].downleft = &edge[1];
697 edge[0].downright = &edge[2];
698 edge[1].up = &edge[0];
699 edge[1].downleft = NULL;
700 edge[1].downright = NULL;
701 edge[2].up = &edge[0];
702 edge[2].downleft = NULL;
703 edge[2].downright = NULL;
705 /* edges of leaves */
711 /* add next leaf on the specified edge */
712 void addnextleaf(int dockedge)
716 if (dockedge >= nextedge) {
717 /* Trying to add leaf nextleaf to nonexisting edge dockedge */
718 FPRINTF(STDOUTFILE "\n\n\nHALT: PLEASE REPORT ERROR F TO DEVELOPERS\n\n\n");
722 if (nextleaf >= Maxspc) {
723 /* Trying to add leaf nextleaf to a tree with Maxspc leaves */
724 FPRINTF(STDOUTFILE "\n\n\nHALT: PLEASE REPORT ERROR G TO DEVELOPERS\n\n\n");
728 /* necessary change in edgeofleaf if dockedge == edgeofleaf[0] */
729 if (edgeofleaf[0] == dockedge) edgeofleaf[0] = nextedge;
731 /* adding nextedge to the tree */
732 edge[nextedge].up = edge[dockedge].up;
733 edge[nextedge].downleft = &edge[dockedge];
734 edge[nextedge].downright = &edge[nextedge+1];
735 edge[dockedge].up = &edge[nextedge];
737 if (edge[nextedge].up != NULL) {
738 if ( ((edge[nextedge].up)->downleft) == &edge[dockedge] )
739 (edge[nextedge].up)->downleft = &edge[nextedge];
741 (edge[nextedge].up)->downright = &edge[nextedge];
744 /* adding nextedge + 1 to the tree */
745 edge[nextedge+1].up = &edge[nextedge];
746 edge[nextedge+1].downleft = NULL;
747 edge[nextedge+1].downright = NULL;
748 edgeofleaf[nextleaf] = nextedge+1;
750 /* the two new edges get info about the old edges */
752 for (i = 0; i < nextedge; i++) {
753 switch ( (edge[dockedge].edgemap)[i] ) {
755 /* down right changes to down left */
756 case 5: (edge[nextedge].edgemap)[i] = 4;
759 /* null changes to down left */
760 case 0: (edge[nextedge].edgemap)[i] = 4;
763 default: (edge[nextedge].edgemap)[i] =
764 (edge[dockedge].edgemap)[i];
770 for (i = 0; i < nextedge; i++) {
771 switch ( (edge[dockedge].edgemap)[i] ) {
773 /* up/down left changes to up */
774 case 2: (edge[nextedge+1].edgemap)[i] = 1;
777 /* up/down right changes to up */
778 case 3: (edge[nextedge+1].edgemap)[i] = 1;
781 /* down left changes to up/down left */
782 case 4: (edge[nextedge+1].edgemap)[i] = 2;
785 /* down right changes to up/down left */
786 case 5: (edge[nextedge+1].edgemap)[i] = 2;
789 /* null changes to up/down left */
790 case 0: (edge[nextedge+1].edgemap)[i] = 2;
794 default: (edge[nextedge+1].edgemap)[i] =
795 (edge[dockedge].edgemap)[i];
801 for (i = 0; i < nextedge; i++) {
802 switch ( (edge[dockedge].edgemap)[i] ) {
804 /* up/down right changes to up */
805 case 3: (edge[dockedge].edgemap)[i] = 1;
808 /* up/down left changes to up */
809 case 2: (edge[dockedge].edgemap)[i] = 1;
816 /* all edgemaps are updated for the two new edges */
818 (edge[nextedge].edgemap)[nextedge] = 0;
819 (edge[nextedge].edgemap)[nextedge+1] = 5; /* down right */
822 (edge[nextedge+1].edgemap)[nextedge] = 1; /* up */
823 (edge[nextedge+1].edgemap)[nextedge+1] = 0;
825 /* all other edges */
826 for (i = 0; i < nextedge; i++) {
827 (edge[i].edgemap)[nextedge] = (edge[i].edgemap)[dockedge];
828 (edge[i].edgemap)[nextedge+1] = (edge[i].edgemap)[dockedge];
831 /* an extra for dockedge */
832 (edge[dockedge].edgemap)[nextedge] = 1; /* up */
833 (edge[dockedge].edgemap)[nextedge+1] = 3; /* up/down right */
836 nextedge = nextedge + 2;
840 /* free memory (to be called after inittree) */
845 for (i = 0; i < 2 * Maxspc - 3; i++) free(edge[i].edgemap);
850 /* writes OTU sitting on edge ed */
851 void writeOTU(FILE *outfp, int ed)
855 /* test whether we are on a leaf */
856 if (edge[ed].downright == NULL && edge[ed].downleft == NULL) {
857 for (i = 1; i < nextleaf; i++) {
858 if (edgeofleaf[i] == ed) { /* i is the leaf of ed */
859 column += fputid(outfp, trueID[i]);
865 /* we are NOT on a leaf */
868 writeOTU(outfp, edge[ed].downleft->numedge);
874 fprintf(outfp, "\n ");
876 writeOTU(outfp, edge[ed].downright->numedge);
882 void writetree(FILE *outfp)
886 column += fputid(outfp, trueID[0]) + 3;
888 writeOTU(outfp, edge[edgeofleaf[0]].downleft->numedge);
892 writeOTU(outfp, edge[edgeofleaf[0]].downright->numedge);
893 fprintf(outfp, ");\n");
897 /* clear all edgeinfos */
902 for (i = 0; i < nextedge; i++)
903 edge[i].edgeinfo = 0;
904 } /* resetedgeinfo */
906 /* increment all edgeinfo between leaf A and B */
907 void incrementedgeinfo(int A, int B)
909 int curredge, finaledge, nextstep;
913 finaledge = edgeofleaf[B];
915 curredge = edgeofleaf[A];
916 edge[curredge].edgeinfo = edge[curredge].edgeinfo + 1;
918 while (curredge != finaledge) {
919 nextstep = (edge[curredge].edgemap)[finaledge];
923 case 1: curredge = (edge[curredge].up)->numedge;
927 case 2: curredge = ((edge[curredge].up)->downleft)->numedge;
931 case 3: curredge = ((edge[curredge].up)->downright)->numedge;
935 case 4: curredge = (edge[curredge].downleft)->numedge;
939 case 5: curredge = (edge[curredge].downright)->numedge;
943 edge[curredge].edgeinfo = edge[curredge].edgeinfo + 1;
945 } /* incrementedgeinfo */
947 /* checks which edge has the lowest edgeinfo
948 if there are several edges with the same lowest edgeinfo,
949 one of them will be selected randomly */
950 void minimumedgeinfo()
952 int i, k, howmany, randomnum;
956 mininfo = edge[0].edgeinfo;
957 for (i = 1; i < nextedge; i++)
958 if (edge[i].edgeinfo <= mininfo) {
959 if (edge[i].edgeinfo == mininfo) {
963 mininfo = edge[i].edgeinfo;
968 if (howmany > 1) { /* draw random edge */
969 randomnum = randominteger(howmany) + 1; /* 1 to howmany */
971 for (k = 0; k < randomnum; k++) {
974 } while (edge[i].edgeinfo != mininfo);
978 } /* minimumedgeinfo */
983 /*******************************************/
985 /*******************************************/
987 /* compute address of the 4 int (sort key) in the 4 int node */
988 int ct_sortkeyaddr(int addr)
999 /* compute address of the next edge pointer in a 4 int node (0->1->2->0) */
1000 int ct_nextedgeaddr(int addr)
1004 if ( a == 2 ) { res = addr - 2; }
1005 else { res = addr + 1; }
1012 /* compute address of 1st edge of a 4 int node from node number */
1013 int ct_1stedge(int node)
1023 /* compute address of 2nd edge of a 4 int node from node number */
1024 int ct_2ndedge(int node)
1034 /* compute address of 3rd edge of a 4 int node from node number */
1035 int ct_3rdedge(int node)
1045 /* check whether node 'node' is a leaf (2nd/3rd edge pointer = -1) */
1046 int ct_isleaf(int node, int *ctree)
1048 return (ctree[ct_3rdedge(node)] < 0);
1054 /* compute node number of 4 int node from an edge addr. */
1055 int ct_addr2node(int addr)
1059 res = (int) ((addr - a) / 4);
1066 /* print graph pointers for checking */
1067 void printctree(int *ctree)
1070 for (n=0; n < 2*Maxspc; n++) {
1071 printf("n[%3d] = (%3d.%2d, %3d.%2d, %3d.%2d | %3d)\n", n,
1072 (int) ctree[ct_1stedge(n)]/4,
1073 (int) ctree[ct_1stedge(n)]%4,
1074 (int) ctree[ct_2ndedge(n)]/4,
1075 (int) ctree[ct_2ndedge(n)]%4,
1076 (int) ctree[ct_3rdedge(n)]/4,
1077 (int) ctree[ct_3rdedge(n)]%4,
1078 ctree[ct_3rdedge(n)+1]);
1086 /* allocate memory for ctree 3 ints pointer plus 1 check byte */
1092 snodes = (int *) malloc(4 * 2 * Maxspc * sizeof(int));
1093 if (snodes == NULL) maerror("snodes in copytree");
1095 for (n=0; n<(4 * 2 * Maxspc); n++) {
1104 /* free memory of a tree for sorting */
1105 void freectree(int **snodes)
1114 /* copy subtree recursively */
1115 void copyOTU(int *ctree, /* tree array struct */
1116 int *ct_nextnode, /* next free node */
1117 int ct_curredge, /* currende edge to add subtree */
1118 int *ct_nextleaf, /* next free leaf (0-maxspc) */
1119 int ed) /* edge in puzzling step tree */
1121 int i, nextcurredge;
1123 /* test whether we are on a leaf */
1124 if (edge[ed].downright == NULL && edge[ed].downleft == NULL) {
1125 for (i = 1; i < nextleaf; i++) {
1126 if (edgeofleaf[i] == ed) { /* i is the leaf of ed */
1127 nextcurredge = ct_1stedge(*ct_nextleaf);
1128 ctree[ct_curredge] = nextcurredge;
1129 ctree[nextcurredge] = ct_curredge;
1130 ctree[ct_sortkeyaddr(nextcurredge)] = trueID[i];
1137 /* we are NOT on a leaf */
1138 nextcurredge = ct_1stedge(*ct_nextnode);
1139 ctree[ct_curredge] = nextcurredge;
1140 ctree[nextcurredge] = ct_curredge;
1142 nextcurredge = ct_nextedgeaddr(nextcurredge);
1143 copyOTU(ctree, ct_nextnode, nextcurredge,
1144 ct_nextleaf, edge[ed].downleft->numedge);
1146 nextcurredge = ct_nextedgeaddr(nextcurredge);
1147 copyOTU(ctree, ct_nextnode, nextcurredge,
1148 ct_nextleaf, edge[ed].downright->numedge);
1154 /* copy treestructure to sorting structure */
1155 void copytree(int *ctree)
1161 ct_nextnode = Maxspc;
1162 ct_curredge = ct_1stedge(ct_nextnode);
1165 ctree[ct_1stedge(0)] = ct_curredge;
1166 ctree[ct_curredge] = ct_1stedge(0);
1167 ctree[ct_sortkeyaddr(0)] = trueID[0];
1171 ct_curredge = ct_nextedgeaddr(ct_curredge);
1172 copyOTU(ctree, &ct_nextnode, ct_curredge,
1173 &ct_nextleaf, edge[edgeofleaf[0]].downleft->numedge);
1175 ct_curredge = ct_nextedgeaddr(ct_curredge);
1176 copyOTU(ctree, &ct_nextnode, ct_curredge,
1177 &ct_nextleaf, edge[edgeofleaf[0]].downright->numedge);
1183 /* sort subtree from edge recursively by indices */
1184 int sortOTU(int edge, int *ctree)
1190 if (ctree[ct_2ndedge((int) (edge / 4))] < 0)
1191 return ctree[ct_sortkeyaddr(edge)];
1193 edge1 = ctree[ct_nextedgeaddr(edge)];
1194 edge2 = ctree[ct_nextedgeaddr(ct_nextedgeaddr(edge))];
1196 /* printf ("visiting [%5d] -> [%5d], [%5d]\n", edge, edge1, edge2); */
1197 /* printf ("visiting [%2d.%2d] -> [%2d.%2d], [%2d.%2d]\n",
1198 (int)(edge/4), edge%4, (int)(edge1/4), edge1%4,
1199 (int)(edge2/4), edge2%4); */
1201 key1 = sortOTU(edge1, ctree);
1202 key2 = sortOTU(edge2, ctree);
1205 tempedge = ctree[ctree[edge1]];
1206 ctree[ctree[edge1]] = ctree[ctree[edge2]];
1207 ctree[ctree[edge2]] = tempedge;
1208 tempedge = ctree[edge1];
1209 ctree[edge1] = ctree[edge2];
1210 ctree[edge2] = tempedge;
1211 ctree[ct_sortkeyaddr(edge)] = key2;
1214 ctree[ct_sortkeyaddr(edge)] = key1;
1216 return ctree[ct_sortkeyaddr(edge)];
1222 /* sort ctree recursively by indices */
1223 int sortctree(int *ctree)
1225 int n, startnode=-1;
1226 for(n=0; n<Maxspc; n++) {
1227 if (ctree[ct_sortkeyaddr(n*4)] == 0)
1230 sortOTU(ctree[startnode * 4], ctree);
1237 /* print recursively subtree of edge of sorted tree ctree */
1238 void printfsortOTU(int edge, int *ctree)
1242 if (ctree[ct_2ndedge((int) (edge / 4))] < 0) {
1243 printf("%d", ctree[ct_sortkeyaddr(edge)]);
1247 edge1 = ctree[ct_nextedgeaddr(edge)];
1248 edge2 = ctree[ct_nextedgeaddr(ct_nextedgeaddr(edge))];
1251 printfsortOTU(edge1, ctree);
1253 printfsortOTU(edge2, ctree);
1261 /* print recursively sorted tree ctree */
1262 int printfsortctree(int *ctree)
1264 int n, startnode=-1;
1265 for(n=0; n<Maxspc; n++) {
1266 if (ctree[ct_sortkeyaddr(n*4)] == 0)
1269 printf ("(%d,", ctree[ct_sortkeyaddr(startnode*4)]);
1270 printfsortOTU(ctree[startnode * 4], ctree);
1278 /* print recursively subtree of edge of sorted tree ctree to string */
1279 void sprintfOTU(char *str, int *len, int edge, int *ctree)
1283 if (ctree[ct_2ndedge((int) (edge / 4))] < 0) {
1284 *len+=sprintf(&(str[*len]), "%d", ctree[ct_sortkeyaddr(edge)]);
1288 edge1 = ctree[ct_nextedgeaddr(edge)];
1289 edge2 = ctree[ct_nextedgeaddr(ct_nextedgeaddr(edge))];
1291 sprintf(&(str[*len]), "(");
1293 sprintfOTU(str, len, edge1, ctree);
1294 sprintf(&(str[*len]), ",");
1296 sprintfOTU(str, len, edge2, ctree);
1297 sprintf(&(str[*len]), ")");
1303 /* print recursively sorted tree ctree to string */
1304 char *sprintfctree(int *ctree, int strglen)
1311 treestr = (char *) malloc(strglen * sizeof(char));
1313 for(n=0; n<Maxspc; n++) {
1314 if (ctree[ct_sortkeyaddr(n*4)] == 0)
1317 len+=sprintf (&(tmpptr[len]), "(%d,", ctree[ct_sortkeyaddr(startnode*4)]);
1318 sprintfOTU(tmpptr, &len, ctree[startnode * 4], ctree);
1319 len+=sprintf (&(tmpptr[len]), ");");
1327 /***********************************************/
1328 /* establish and handle a list of sorted trees */
1329 /***********************************************/
1333 /* initialize structure */
1334 treelistitemtype *inittreelist(int *treenum)
1343 /* malloc new tree list item */
1344 treelistitemtype *gettreelistitem()
1346 treelistitemtype *tmpptr;
1347 tmpptr = (treelistitemtype *)malloc(sizeof(treelistitemtype));
1348 if (tmpptr == NULL) maerror("item of intermediate tree stuctures");
1349 (*tmpptr).pred = NULL;
1350 (*tmpptr).succ = NULL;
1351 (*tmpptr).tree = NULL;
1352 (*tmpptr).count = 0;
1353 (*tmpptr).idx = itemcount++;
1359 /* free whole tree list */
1360 void freetreelist(treelistitemtype **list,
1364 treelistitemtype *current;
1365 treelistitemtype *next;
1367 while (current != NULL) {
1368 next = (*current).succ;
1369 if ((*current).tree != NULL) {
1370 free ((*current).tree);
1371 (*current).tree = NULL;
1379 } /* freetreelist */
1384 /* add tree to the tree list */
1385 treelistitemtype *addtree2list(char **tree, /* sorted tree string */
1386 int numtrees, /* how many occurred, e.g. in parallel */
1387 treelistitemtype **list, /* addr. of tree list */
1391 treelistitemtype *tmpptr = NULL;
1392 treelistitemtype *newptr = NULL;
1396 if ((*list == NULL) || (numitems == 0)) {
1397 newptr = gettreelistitem();
1398 (*newptr).tree = *tree;
1400 (*newptr).id = *numitems;
1401 (*newptr).count = numtrees;
1408 result = strcmp( (*tmpptr).tree, *tree);
1410 free(*tree); *tree = NULL;
1411 (*tmpptr).count += numtrees;
1412 *numsum += numtrees;
1415 } else { if (result < 0) {
1416 if ((*tmpptr).succ != NULL)
1417 tmpptr = (*tmpptr).succ;
1419 newptr = gettreelistitem();
1420 (*newptr).tree = *tree;
1422 (*newptr).id = *numitems;
1423 (*newptr).count = numtrees;
1424 (*newptr).pred = tmpptr;
1425 (*tmpptr).succ = newptr;
1427 *numsum += numtrees;
1430 } else { /* result < 0 */
1431 newptr = gettreelistitem();
1432 (*newptr).tree = *tree;
1434 (*newptr).id = *numitems;
1435 (*newptr).count = numtrees;
1436 (*newptr).succ = tmpptr;
1437 (*newptr).pred = (*tmpptr).pred;
1438 (*tmpptr).pred = newptr;
1439 *numsum += numtrees;
1441 if ((*newptr).pred != NULL) {
1442 (*(*newptr).pred).succ = newptr;
1448 } /* end if result < 0 */
1449 } /* end if result != 0 */
1450 } /* while searching in list */
1451 } /* if list empty, else */
1453 } /* addtree2list */
1458 /* resort list of trees by number of occurences for output */
1459 void sortbynum(treelistitemtype *list, treelistitemtype **sortlist)
1461 treelistitemtype *tmpptr = NULL;
1462 treelistitemtype *curr = NULL;
1463 treelistitemtype *next = NULL;
1466 if (list == NULL) fprintf(stderr, "Grrrrrrrrr>>>>\n");
1469 while (tmpptr != NULL) {
1470 (*tmpptr).sortnext = (*tmpptr).succ;
1471 (*tmpptr).sortlast = (*tmpptr).pred;
1472 tmpptr = (*tmpptr).succ;
1475 while (xchange > 0) {
1478 if (curr == NULL) fprintf(stderr, "Grrrrrrrrr>>>>\n");
1479 while((*curr).sortnext != NULL) {
1480 next = (*curr).sortnext;
1481 if ((*curr).count >= (*next).count)
1482 curr = (*curr).sortnext;
1484 if ((*curr).sortlast != NULL)
1485 (*((*curr).sortlast)).sortnext = next;
1486 if (*sortlist == curr)
1488 (*next).sortlast = (*curr).sortlast;
1490 if ((*next).sortnext != NULL)
1491 (*((*next).sortnext)).sortlast = curr;
1492 (*curr).sortnext = (*next).sortnext;
1494 (*curr).sortlast = next;
1495 (*next).sortnext = curr;
1506 /* print puzzling step tree stuctures for checking */
1507 void printfpstrees(treelistitemtype *list)
1510 treelistitemtype *tmpptr = NULL;
1513 while (tmpptr != NULL) {
1514 printf ("%c[%2d] %5d %s\n", ch, (*tmpptr).idx, (*tmpptr).count, (*tmpptr).tree);
1515 tmpptr = (*tmpptr).succ;
1522 /* print sorted puzzling step tree stucture with names */
1523 void fprintffullpstree(FILE *outf, char *treestr)
1528 for(n=0; treestr[n] != '\0'; n++){
1529 while(isdigit((int)treestr[n])){
1530 idnum = (10 * idnum) + ((int)treestr[n]-48);
1538 (void)fputid(outf, idnum);
1545 fprintf(outf, "%c", treestr[n]);
1552 /* print sorted puzzling step tree stuctures with names */
1553 void fprintfsortedpstrees(FILE *output,
1554 treelistitemtype *list, /* tree list */
1555 int itemnum, /* order number */
1556 int itemsum, /* number of trees */
1557 int comment, /* with statistics, or puzzle report ? */
1558 float cutoff) /* cutoff percentage */
1560 treelistitemtype *tmpptr = NULL;
1561 treelistitemtype *slist = NULL;
1565 if (list == NULL) fprintf(stderr, "Grrrrrrrrr>>>>\n");
1566 sortbynum(list, &slist);
1569 while (tmpptr != NULL) {
1570 percent = (float)(100.0 * (*tmpptr).count / itemsum);
1571 if ((cutoff == 0.0) || (cutoff <= percent)) {
1573 fprintf (output, "[ %d. %d %.2f %d %d %d ]", num++, (*tmpptr).count, percent, (*tmpptr).id, itemnum, itemsum);
1576 fprintf (output, "\n");
1577 fprintf (output, "The following tree(s) occured in more than %.2f%% of the %d puzzling steps.\n", cutoff, itemsum);
1578 fprintf (output, "The trees are orderd descending by the number of occurences.\n");
1579 fprintf (output, "\n");
1580 fprintf (output, "\n occurences ID Phylip tree\n");
1582 fprintf (output, "%2d. %5d %6.2f%% %5d ", num++, (*tmpptr).count, percent, (*tmpptr).id);
1584 fprintffullpstree(output, (*tmpptr).tree);
1585 fprintf (output, "\n");
1587 tmpptr = (*tmpptr).sortnext;
1591 fprintf (output, "\n");
1593 case 1: fprintf (output, "There were no tree topologies (out of %d) occuring with a percentage >= %.2f%% of the %d puzzling steps.\n", itemnum, cutoff, itemsum); break;
1594 case 2: fprintf (output, "There was one tree topology (out of %d) occuring with a percentage >= %.2f%%.\n", itemnum, cutoff); break;
1595 default: fprintf (output, "There were %d tree topologies (out of %d) occuring with a percentage >= %.2f%%.\n", num-1, itemnum, cutoff); break;
1597 fprintf (output, "\n");
1598 fprintf (output, "\n");
1601 } /* fprintfsortedpstrees */
1605 /* print sorted tree topologies for checking */
1606 void printfsortedpstrees(treelistitemtype *list)
1608 treelistitemtype *tmpptr = NULL;
1609 treelistitemtype *slist = NULL;
1611 sortbynum(list, &slist);
1614 while (tmpptr != NULL) {
1615 printf ("[%2d] %5d %s\n", (*tmpptr).idx, (*tmpptr).count, (*tmpptr).tree);
1616 tmpptr = (*tmpptr).sortnext;
1618 } /* printfsortedpstrees */
1621 /*******************************************/
1622 /* end of tree sorting */
1623 /*******************************************/
1627 /******************************************************************************/
1628 /* functions for computing the consensus tree */
1629 /******************************************************************************/
1631 /* prepare for consensus tree analysis */
1632 void initconsensus()
1635 biparts = new_cmatrix(Maxspc-3, Maxspc);
1636 # endif /* PARALLEL */
1638 if (Maxspc % 32 == 0)
1639 splitlength = Maxspc/32;
1640 else splitlength = (Maxspc + 32 - (Maxspc % 32))/32;
1641 numbiparts = 0; /* no pattern stored so far */
1642 maxbiparts = 0; /* no memory reserved so far */
1644 splitpatterns = NULL;
1646 splitcomp = (uli *) malloc(splitlength * sizeof(uli) );
1647 if (splitcomp == NULL) maerror("splitcomp in initconsensus");
1650 /* prototype needed for recursive function */
1651 void makepart(int i, int curribrnch);
1653 /* recursive function to get bipartitions */
1654 void makepart(int i, int curribrnch)
1658 if ( edge[i].downright == NULL ||
1659 edge[i].downleft == NULL) { /* if i is leaf */
1661 /* check out what leaf j sits on this edge i */
1662 for (j = 1; j < Maxspc; j++) {
1663 if (edgeofleaf[j] == i) {
1664 biparts[curribrnch][trueID[j]] = '*';
1668 } else { /* still on inner branch */
1669 makepart(edge[i].downleft->numedge, curribrnch);
1670 makepart(edge[i].downright->numedge, curribrnch);
1674 /* compute bipartitions of tree of current puzzling step */
1675 void computebiparts()
1677 int i, j, curribrnch;
1681 for (i = 0; i < Maxspc - 3; i++)
1682 for (j = 0; j < Maxspc; j++)
1683 biparts[i][j] = '.';
1685 for (i = 0; i < Maxbrnch; i++) {
1686 if (!( edgeofleaf[0] == i ||
1687 edge[i].downright == NULL ||
1688 edge[i].downleft == NULL) ) { /* check all inner branches */
1690 makepart(i, curribrnch);
1692 /* make sure that the root is always a '*' */
1693 if (biparts[curribrnch][outgroup] == '.') {
1694 for (j = 0; j < Maxspc; j++) {
1695 if (biparts[curribrnch][j] == '.')
1696 biparts[curribrnch][j] = '*';
1698 biparts[curribrnch][j] = '.';
1705 /* print out the bipartition n of all different splitpatterns */
1706 void printsplit(FILE *fp, uli n)
1712 for (i = 0; i < splitlength; i++) {
1713 z = splitpatterns[n*splitlength + i];
1714 for (j = 0; j < 32 && col < Maxspc; j++) {
1715 if (col % 10 == 0 && col != 0) fprintf(fp, " ");
1716 if (z & 1) fprintf(fp, ".");
1717 else fprintf(fp, "*");
1724 /* make new entries for new different bipartitions and count frequencies */
1725 void makenewsplitentries()
1727 int i, j, bpc, identical, idflag, bpsize;
1728 uli nextentry, obpc;
1730 /* where the next entry would be in splitpatterns */
1731 nextentry = numbiparts;
1733 for (bpc = 0; bpc < Maxspc - 3; bpc++) { /* for every new bipartition */
1734 /* convert bipartition into a more compact format */
1736 for (i = 0; i < splitlength; i++) {
1738 for (j = 0; j < 32; j++) {
1739 splitcomp[i] = splitcomp[i] >> 1;
1740 if (i*32 + j < Maxspc)
1741 if (biparts[bpc][i*32 + j] == '.') {
1742 /* set highest bit */
1743 splitcomp[i] = (splitcomp[i] | 2147483648UL);
1744 bpsize++; /* count the '.' */
1748 /* compare to the *old* patterns */
1750 for (obpc = 0; (obpc < numbiparts) && (!identical); obpc++) {
1751 /* compare first partition size */
1752 if (splitsizes[obpc] == bpsize) idflag = TRUE;
1753 else idflag = FALSE;
1754 /* if size is identical compare whole partition */
1755 for (i = 0; (i < splitlength) && idflag; i++)
1756 if (splitcomp[i] != splitpatterns[obpc*splitlength + i])
1758 if (idflag) identical = TRUE;
1760 if (identical) { /* if identical increase frequency */
1761 splitfreqs[2*(obpc-1)]++;
1762 } else { /* create new entry */
1763 if (nextentry == maxbiparts) { /* reserve more memory */
1764 maxbiparts = maxbiparts + 2*Maxspc;
1765 splitfreqs = (uli *) myrealloc(splitfreqs,
1766 2*maxbiparts * sizeof(uli) );
1767 /* 2x: splitfreqs contains also an index (sorting!) */
1768 if (splitfreqs == NULL) maerror("splitfreqs in makenewsplitentries");
1769 splitpatterns = (uli *) myrealloc(splitpatterns,
1770 splitlength*maxbiparts * sizeof(uli) );
1771 if (splitpatterns == NULL) maerror("splitpatterns in makenewsplitentries");
1772 splitsizes = (int *) myrealloc(splitsizes,
1773 maxbiparts * sizeof(int) );
1774 if (splitsizes == NULL) maerror("splitsizes in makenewsplitentries");
1776 splitfreqs[2*nextentry] = 1; /* frequency */
1777 splitfreqs[2*nextentry+1] = nextentry; /* index for sorting */
1778 for (i = 0; i < splitlength; i++)
1779 splitpatterns[nextentry*splitlength + i] = splitcomp[i];
1780 splitsizes[nextentry] = bpsize;
1784 numbiparts = nextentry;
1789 - every entry in consbiparts is one node of the consensus tree
1790 - for each node one has to know which taxa and which other nodes
1791 are *directly* descending from it
1792 - for every taxon/node number there is a flag that shows
1793 whether it descends from the node or not
1794 - '0' means that neither a taxon nor another node with the
1795 corresponding number decends from the node
1796 '1' means that the corresponding taxon descends from the node
1797 '2' means that the corresponding node descends from the node
1798 '3' means that the corresponding taxon and node descends from the node
1801 /* copy bipartition n of all different splitpatterns to consbiparts[k] */
1802 void copysplit(uli n, int k)
1808 for (i = 0; i < splitlength; i++) {
1809 z = splitpatterns[n*splitlength + i];
1810 for (j = 0; j < 32 && col < Maxspc; j++) {
1811 if (z & 1) consbiparts[k][col] = '1';
1812 else consbiparts[k][col] = '0';
1819 /* compute majority rule consensus tree */
1820 void makeconsensus()
1822 int i, j, k, size, subnode;
1825 /* sort bipartition frequencies */
1826 qsort(splitfreqs, numbiparts, 2*sizeof(uli), ulicmp);
1827 /* how many bipartitions are included in the consensus tree */
1829 for (i = 0; i < numbiparts && i == consincluded; i++) {
1830 if (2*splitfreqs[2*i] > Numtrial) consincluded = i + 1;
1833 /* collect all info about majority rule consensus tree */
1834 /* the +1 is due to the edge with the root */
1835 consconfid = new_ivector(consincluded + 1);
1836 conssizes = new_ivector(2*consincluded + 2);
1837 consbiparts = new_cmatrix(consincluded + 1, Maxspc);
1839 for (i = 0; i < consincluded; i++) {
1840 /* copy partition to consbiparts */
1841 copysplit(splitfreqs[2*i+1], i);
1842 /* frequency in percent (rounded to integer) */
1843 consconfid[i] = (int) floor(100.0*splitfreqs[2*i]/Numtrial + 0.5);
1844 /* size of partition */
1845 conssizes[2*i] = splitsizes[splitfreqs[2*i+1]];
1846 conssizes[2*i+1] = i;
1848 for (i = 0; i < Maxspc; i++) consbiparts[consincluded][i] = '1';
1849 consbiparts[consincluded][outgroup] = '0';
1850 consconfid[consincluded] = 100;
1851 conssizes[2*consincluded] = Maxspc - 1;
1852 conssizes[2*consincluded + 1] = consincluded;
1854 /* sort bipartitions according to cluster size */
1855 qsort(conssizes, consincluded + 1, 2*sizeof(int), intcmp);
1857 /* reconstruct consensus tree */
1858 for (i = 0; i < consincluded; i++) { /* try every node */
1859 size = conssizes[2*i]; /* size of current node */
1860 for (j = i + 1; j < consincluded + 1; j++) {
1862 /* compare only with nodes with more descendants */
1863 if (size == conssizes[2*j]) continue;
1865 /* check whether node i is a subnode of j */
1867 for (k = 0; k < Maxspc && !subnode; k++) {
1868 chari = consbiparts[ conssizes[2*i+1] ][k];
1870 charj = consbiparts[ conssizes[2*j+1] ][k];
1871 if (chari == charj || charj == '3') subnode = TRUE;
1875 /* if i is a subnode of j change j accordingly */
1877 /* remove subnode i from j */
1878 for (k = 0; k < Maxspc; k++) {
1879 chari = consbiparts[ conssizes[2*i+1] ][k];
1881 charj = consbiparts[ conssizes[2*j+1] ][k];
1883 consbiparts[ conssizes[2*j+1] ][k] = '0';
1884 else if (charj == '3') {
1886 consbiparts[ conssizes[2*j+1] ][k] = '2';
1887 else if (chari == '2')
1888 consbiparts[ conssizes[2*j+1] ][k] = '1';
1890 /* Consensus tree [1] */
1891 FPRINTF(STDOUTFILE "\n\n\nHALT: PLEASE REPORT ERROR H TO DEVELOPERS\n\n\n");
1895 /* Consensus tree [2] */
1896 FPRINTF(STDOUTFILE "\n\n\nHALT: PLEASE REPORT ERROR I TO DEVELOPERS\n\n\n");
1901 /* add link to subnode i in node j */
1902 charj = consbiparts[ conssizes[2*j+1] ][ conssizes[2*i+1] ];
1904 consbiparts[ conssizes[2*j+1] ][ conssizes[2*i+1] ] = '2';
1905 else if (charj == '1')
1906 consbiparts[ conssizes[2*j+1] ][ conssizes[2*i+1] ] = '3';
1908 /* Consensus tree [3] */
1909 FPRINTF(STDOUTFILE "\n\n\nHALT: PLEASE REPORT ERROR J TO DEVELOPERS\n\n\n");
1917 /* prototype for recursion */
1918 void writenode(FILE *treefile, int node);
1920 /* write node (writeconsensustree) */
1921 void writenode(FILE *treefile, int node)
1925 fprintf(treefile, "(");
1927 /* write descending nodes */
1929 for (i = 0; i < Maxspc; i++) {
1930 if (consbiparts[node][i] == '2' ||
1931 consbiparts[node][i] == '3') {
1932 if (first) first = FALSE;
1934 fprintf(treefile, ",");
1939 fprintf(treefile, "\n");
1942 writenode(treefile, i);
1944 /* reliability value as internal label */
1945 fprintf(treefile, "%d", consconfid[i]);
1947 column = column + 3;
1950 /* write descending taxa */
1951 for (i = 0; i < Maxspc; i++) {
1952 if (consbiparts[node][i] == '1' ||
1953 consbiparts[node][i] == '3') {
1954 if (first) first = FALSE;
1956 fprintf(treefile, ",");
1961 fprintf(treefile, "\n");
1963 column += fputid(treefile, i);
1966 fprintf(treefile, ")");
1970 /* write consensus tree */
1971 void writeconsensustree(FILE *treefile)
1976 fprintf(treefile, "(");
1977 column += fputid(treefile, outgroup) + 2;
1978 fprintf(treefile, ",");
1979 /* write descending nodes */
1981 for (i = 0; i < Maxspc; i++) {
1982 if (consbiparts[consincluded][i] == '2' ||
1983 consbiparts[consincluded][i] == '3') {
1984 if (first) first = FALSE;
1986 fprintf(treefile, ",");
1991 fprintf(treefile, "\n");
1994 writenode(treefile, i);
1996 /* reliability value as internal label */
1997 fprintf(treefile, "%d", consconfid[i]);
1999 column = column + 3;
2002 /* write descending taxa */
2003 for (i = 0; i < Maxspc; i++) {
2004 if (consbiparts[consincluded][i] == '1' ||
2005 consbiparts[consincluded][i] == '3') {
2006 if (first) first = FALSE;
2008 fprintf(treefile, ",");
2013 fprintf(treefile, "\n");
2015 column += fputid(treefile, i);
2018 fprintf(treefile, ");\n");
2021 /* prototype for recursion */
2022 void nodecoordinates(int node);
2024 /* establish node coordinates (plotconsensustree) */
2025 void nodecoordinates(int node)
2027 int i, ymin, ymax, xcoordinate;
2029 /* first establish coordinates of descending nodes */
2030 for (i = 0; i < Maxspc; i++) {
2031 if (consbiparts[node][i] == '2' ||
2032 consbiparts[node][i] == '3')
2036 /* then establish coordinates of descending taxa */
2037 for (i = 0; i < Maxspc; i++) {
2038 if (consbiparts[node][i] == '1' ||
2039 consbiparts[node][i] == '3') {
2040 /* y-coordinate of taxon i */
2041 ycortax[i] = ytaxcounter;
2042 ytaxcounter = ytaxcounter - 2;
2046 /* then establish coordinates of this node */
2047 ymin = 2*Maxspc - 2;
2050 for (i = 0; i < Maxspc; i++) {
2051 if (consbiparts[node][i] == '2' ||
2052 consbiparts[node][i] == '3') {
2053 if (ycor[i] > ymax) ymax = ycor[i];
2054 if (ycor[i] < ymin) ymin = ycor[i];
2055 if (xcor[i] > xcoordinate) xcoordinate = xcor[i];
2058 for (i = 0; i < Maxspc; i++) {
2059 if (consbiparts[node][i] == '1' ||
2060 consbiparts[node][i] == '3') {
2061 if (ycortax[i] > ymax) ymax = ycortax[i];
2062 if (ycortax[i] < ymin) ymin = ycortax[i];
2065 ycormax[node] = ymax;
2066 ycormin[node] = ymin;
2067 ycor[node] = (int) floor(0.5*(ymax + ymin) + 0.5);
2068 if (xcoordinate == 0) xcoordinate = 9;
2069 xcor[node] = xcoordinate + 4;
2072 /* prototype for recursion */
2073 void drawnode(int node, int xold);
2075 /* drawnode (plotconsensustree) */
2076 void drawnode(int node, int xold)
2081 /* first draw vertical line */
2082 for (i = ycormin[node] + 1; i < ycormax[node]; i++)
2083 treepict[xcor[node]][i] = ':';
2085 /* then draw descending nodes */
2086 for (i = 0; i < Maxspc; i++) {
2087 if (consbiparts[node][i] == '2' ||
2088 consbiparts[node][i] == '3')
2089 drawnode(i, xcor[node]);
2092 /* then draw descending taxa */
2093 for (i = 0; i < Maxspc; i++) {
2094 if (consbiparts[node][i] == '1' ||
2095 consbiparts[node][i] == '3') {
2096 treepict[xcor[node]][ycortax[i]] = ':';
2097 for (j = xcor[node] + 1; j < xsize-10; j++)
2098 treepict[j][ycortax[i]] = '-';
2099 for (j = 0; j < 10; j++)
2100 treepict[xsize-10+j][ycortax[i]] = Identif[i][j];
2104 /* then draw internal edge with consensus value */
2105 treepict[xold][ycor[node]] = ':';
2106 treepict[xcor[node]][ycor[node]] = ':';
2107 for (i = xold + 1; i < xcor[node]-3; i++)
2108 treepict[i][ycor[node]] = '-';
2109 sprintf(buf, "%d", consconfid[node]);
2110 if (consconfid[node] == 100) {
2111 treepict[xcor[node]-3][ycor[node]] = buf[0];
2112 treepict[xcor[node]-2][ycor[node]] = buf[1];
2113 treepict[xcor[node]-1][ycor[node]] = buf[2];
2115 treepict[xcor[node]-3][ycor[node]] = '-';
2116 treepict[xcor[node]-2][ycor[node]] = buf[0];
2117 treepict[xcor[node]-1][ycor[node]] = buf[1];
2121 /* plot consensus tree */
2122 void plotconsensustree(FILE *plotfp)
2124 int i, j, yroot, startree;
2126 /* star tree or no star tree */
2127 if (consincluded == 0) {
2129 consincluded = 1; /* avoids problems with malloc */
2133 /* memory for x-y-coordinates of each bipartition */
2134 xcor = new_ivector(consincluded);
2135 ycor = new_ivector(consincluded);
2136 ycormax = new_ivector(consincluded);
2137 ycormin = new_ivector(consincluded);
2138 if (startree) consincluded = 0; /* avoids problems with malloc */
2140 /* y-coordinates of each taxon */
2141 ycortax = new_ivector(Maxspc);
2142 ycortax[outgroup] = 0;
2144 /* establish coordinates */
2145 ytaxcounter = 2*Maxspc - 2;
2147 /* first establish coordinates of descending nodes */
2148 for (i = 0; i < Maxspc; i++) {
2149 if (consbiparts[consincluded][i] == '2' ||
2150 consbiparts[consincluded][i] == '3')
2154 /* then establish coordinates of descending taxa */
2155 for (i = 0; i < Maxspc; i++) {
2156 if (consbiparts[consincluded][i] == '1' ||
2157 consbiparts[consincluded][i] == '3') {
2158 /* y-coordinate of taxon i */
2159 ycortax[i] = ytaxcounter;
2160 ytaxcounter = ytaxcounter - 2;
2164 /* then establish length of root edge and size of whole tree */
2167 for (i = 0; i < Maxspc; i++) {
2168 if (consbiparts[consincluded][i] == '2' ||
2169 consbiparts[consincluded][i] == '3') {
2170 if (ycor[i] > yroot) yroot = ycor[i];
2171 if (xcor[i] > xsize) xsize = xcor[i];
2174 for (i = 0; i < Maxspc; i++) {
2175 if (consbiparts[consincluded][i] == '1' ||
2176 consbiparts[consincluded][i] == '3') {
2177 if (ycortax[i] > yroot) yroot = ycortax[i];
2180 if (xsize == 0) xsize = 9;
2181 /* size in x direction inclusive one blank on the left */
2184 /* change all x-labels so that (0,0) is down-left */
2185 for (i = 0; i < consincluded; i++)
2186 xcor[i] = xsize-1-xcor[i];
2189 treepict = new_cmatrix(xsize, 2*Maxspc-1);
2190 for (i = 0; i < xsize; i++)
2191 for (j = 0; j < 2*Maxspc-1; j++)
2192 treepict[i][j] = ' ';
2195 for (i = 1; i < yroot; i++)
2196 treepict[1][i] = ':';
2197 treepict[1][0] = ':';
2198 for (i = 2; i < xsize - 10; i++)
2199 treepict[i][0] = '-';
2200 for (i = 0; i < 10; i++)
2201 treepict[xsize-10+i][0] = Identif[outgroup][i];
2203 /* then draw descending nodes */
2204 for (i = 0; i < Maxspc; i++) {
2205 if (consbiparts[consincluded][i] == '2' ||
2206 consbiparts[consincluded][i] == '3')
2210 /* then draw descending taxa */
2211 for (i = 0; i < Maxspc; i++) {
2212 if (consbiparts[consincluded][i] == '1' ||
2213 consbiparts[consincluded][i] == '3') {
2214 treepict[1][ycortax[i]] = ':';
2215 for (j = 2; j < xsize-10; j++)
2216 treepict[j][ycortax[i]] = '-';
2217 for (j = 0; j < 10; j++)
2218 treepict[xsize-10+j][ycortax[i]] = Identif[i][j];
2223 for (i = 2*Maxspc-2; i > -1; i--) {
2224 for (j = 0; j < xsize; j++)
2225 fputc(treepict[j][i], plotfp);
2226 fputc('\n', plotfp);
2231 free_ivector(ycormax);
2232 free_ivector(ycormin);
2233 free_ivector(ycortax);
2234 free_cmatrix(treepict);
2239 /******************************************************************************/
2240 /* storing and evaluating quartet branching information */
2241 /******************************************************************************/
2245 for a quartet with the taxa a, b, c, d there are
2246 three possible binary trees:
2252 For every quartet information about its branching structure is
2253 stored. With the functions readquartet and writequartet
2254 this information can be accessed. For every quartet (a,b,c,d)
2255 with a < b < c < d (taxa) the branching information is encoded
2259 +-------------+-------------+-------------+-------------+
2260 | not used | tree 3 | tree 2 | tree 1 |
2261 +-------------+-------------+-------------+-------------+
2263 If the branching structure of the taxa corresponds to one of the
2264 three trees the corresponding bit is set. If the branching structure
2265 is unclear because two of the three trees have the same maximum
2266 likelihood value the corresponding two bits are set. If the branching
2267 structure is completely unknown all the bits are set (the highest
2268 bit is always cleared because it is not used).
2272 /* allocate memory for quartets */
2273 unsigned char *mallocquartets(int taxa)
2276 unsigned char *qinfo;
2278 /* compute number of quartets */
2279 Numquartets = (uli) taxa*(taxa-1)*(taxa-2)*(taxa-3)/24;
2280 if (Numquartets % 2 == 0) { /* even number */
2281 numch = Numquartets/2;
2282 } else { /* odd number */
2283 numch = (Numquartets + 1)/2;
2285 /* allocate memory */
2286 qinfo = (unsigned char *) malloc(numch * sizeof(unsigned char) );
2287 if (qinfo == NULL) maerror("quartetinfo in mallocquartets");
2288 for (nc = 0; nc < numch; nc++) qinfo[nc] = 0;
2292 /* free quartet memory */
2298 /* read quartet info - a < b < c < d */
2299 unsigned char readquartet(int a, int b, int c, int d)
2305 + (uli) c*(c-1)*(c-2)/6
2306 + (uli) d*(d-1)*(d-2)*(d-3)/24;
2307 if (qnum % 2 == 0) { /* even number */
2309 return (quartetinfo[qnum/2] & (unsigned char) 15);
2310 } else { /* odd number */
2312 return ((quartetinfo[(qnum-1)/2] & (unsigned char) 240)>>4);
2316 /* write quartet info - a < b < c < d, 0 <= info <= 15 */
2317 void writequartet(int a, int b, int c, int d, unsigned char info)
2323 + (uli) c*(c-1)*(c-2)/6
2324 + (uli) d*(d-1)*(d-2)*(d-3)/24;
2325 if (qnum % 2 == 0) { /* even number */
2327 quartetinfo[qnum/2] =
2328 ((quartetinfo[qnum/2] & (unsigned char) 240) |
2329 (info & (unsigned char) 15));
2330 } else { /* odd number */
2332 quartetinfo[(qnum-1)/2] =
2333 ((quartetinfo[(qnum-1)/2] & (unsigned char) 15) |
2334 ((info & (unsigned char) 15)<<4));
2339 void openfiletowrite(FILE **, char[], char[]);
2340 void closefile(FILE *);
2342 /* sorts three doubles in descending order */
2343 void sort3doubles(dvector num, ivector order)
2345 if (num[0] > num[1]) {
2346 if(num[2] > num[0]) {
2350 } else if (num[2] < num[1]) {
2360 if(num[2] > num[1]) {
2364 } else if (num[2] < num[0]) {
2376 /* checks out all possible quartets */
2377 void computeallquartets()
2381 unsigned char treebits[3];
2385 double qc2, mintogo, minutes, hours, temp;
2386 double temp1, temp2, temp3;
2387 unsigned char discreteweight[3];
2391 treebits[0] = (unsigned char) 1;
2392 treebits[1] = (unsigned char) 2;
2393 treebits[2] = (unsigned char) 4;
2395 if (show_optn) { /* list all unresolved quartets */
2396 openfiletowrite(&unresfp, UNRESOLVED, "unresolved quartet trees");
2397 fprintf(unresfp, "List of all completely unresolved quartets:\n\n");
2403 /* start timer - percentage of completed quartets */
2419 initsched(&sched, numquarts(Maxspc), PP_NumProcs-1, 4);
2420 qamount=sgss(&sched);
2421 while (qamount > 0) {
2422 if (PP_emptyslave()) {
2423 PP_RecvQuartBlock(0, &sq, &noq, quartetinfo, &apr);
2426 dest = PP_getslave();
2427 PP_SendDoQuartBlock(dest, qaddr, qamount, (approxqp ? APPROX : EXACT));
2428 qblocksent += qamount;
2430 qamount=sgss(&sched);
2432 MPI_Iprobe(MPI_ANY_SOURCE, PP_QUARTBLOCKSPECS, PP_Comm, &flag, &stat);
2434 PP_RecvQuartBlock(0, &sq, &noq, quartetinfo, &apr);
2436 MPI_Iprobe(MPI_ANY_SOURCE, PP_QUARTBLOCKSPECS, PP_Comm, &flag, &stat);
2439 while (qblocksent > 0) {
2440 PP_RecvQuartBlock(0, &sq, &noq, quartetinfo, &apr);
2444 # else /* PARALLEL */
2446 addtimes(GENERAL, &tarr);
2447 if (savequartlh_optn) {
2448 openfiletowrite(&lhfp, ALLQUARTLH, "all quartet likelihoods");
2449 if (saveqlhbin_optn) writetpqfheader(Maxspc, lhfp, 3);
2450 else writetpqfheader(Maxspc, lhfp, 4);
2453 for (i = 3; i < Maxspc; i++)
2454 for (c = 2; c < i; c++)
2455 for (b = 1; b < c; b++)
2456 for (a = 0; a < b; a++) {
2459 /* generate message every 15 minutes */
2462 if ( (time2 - time1) > 900) {
2463 /* every 900 seconds */
2464 /* percentage of completed quartets */
2466 FPRINTF(STDOUTFILE "\n");
2469 qc2 = 100.*nq/Numquartets;
2470 mintogo = (100.0-qc2) *
2471 (double) (time2-time0)/60.0/qc2;
2472 hours = floor(mintogo/60.0);
2473 minutes = mintogo - 60.0*hours;
2474 FPRINTF(STDOUTFILE "%.2f%%", qc2);
2475 FPRINTF(STDOUTFILE " completed (remaining");
2476 FPRINTF(STDOUTFILE " time: %.0f", hours);
2477 FPRINTF(STDOUTFILE " hours %.0f", minutes);
2478 FPRINTF(STDOUTFILE " minutes)\n");
2483 /* maximum likelihood values */
2485 /* exact or approximate maximum likelihood values */
2486 compute_quartlklhds(a,b,c,i,&qweight[0],&qweight[1],&qweight[2], (approxqp ? APPROX : EXACT));
2488 if (savequartlh_optn) {
2489 if (saveqlhbin_optn)
2490 fwrite(qweight, sizeof(double), 3, lhfp);
2492 fprintf(lhfp, "(%d,%d,%d,%d)\t%f\t%f\t%f\n", a, b, c, i,
2493 qweight[0], qweight[1], qweight[2]);
2496 /* sort in descending order */
2497 sort3doubles(qweight, qworder);
2499 if (usebestq_optn) {
2501 discreteweight[sqorder[2]] = treebits[qworder[0]];
2502 if (qweight[qworder[0]] == qweight[qworder[1]]) {
2503 discreteweight[sqorder[2]] = discreteweight[sqorder[2]] || treebits[qworder[1]];
2504 if (qweight[qworder[1]] == qweight[qworder[2]]) {
2505 discreteweight[sqorder[2]] = discreteweight[sqorder[2]] || treebits[qworder[2]];
2506 discreteweight[sqorder[2]] = 7;
2511 /* compute Bayesian weights */
2512 qweight[qworder[1]] = exp(qweight[qworder[1]]-qweight[qworder[0]]);
2513 qweight[qworder[2]] = exp(qweight[qworder[2]]-qweight[qworder[0]]);
2514 qweight[qworder[0]] = 1.0;
2515 temp = qweight[0] + qweight[1] + qweight[2];
2516 qweight[0] = qweight[0]/temp;
2517 qweight[1] = qweight[1]/temp;
2518 qweight[2] = qweight[2]/temp;
2520 /* square deviations */
2521 temp1 = 1.0 - qweight[qworder[0]];
2522 sqdiff[0] = temp1 * temp1 +
2523 qweight[qworder[1]] * qweight[qworder[1]] +
2524 qweight[qworder[2]] * qweight[qworder[2]];
2525 discreteweight[0] = treebits[qworder[0]];
2527 temp1 = 0.5 - qweight[qworder[0]];
2528 temp2 = 0.5 - qweight[qworder[1]];
2529 sqdiff[1] = temp1 * temp1 + temp2 * temp2 +
2530 qweight[qworder[2]] * qweight[qworder[2]];
2531 discreteweight[1] = treebits[qworder[0]] + treebits[qworder[1]];
2533 temp1 = onethird - qweight[qworder[0]];
2534 temp2 = onethird - qweight[qworder[1]];
2535 temp3 = onethird - qweight[qworder[2]];
2536 sqdiff[2] = temp1 * temp1 + temp2 * temp2 + temp3 * temp3;
2537 discreteweight[2] = (unsigned char) 7;
2539 /* sort in descending order */
2540 sort3doubles(sqdiff, sqorder);
2543 /* determine best discrete weight */
2544 writequartet(a, b, c, i, discreteweight[sqorder[2]]);
2546 /* counting completely unresolved quartets */
2547 if (discreteweight[sqorder[2]] == 7) {
2554 fputid10(unresfp, a);
2555 fprintf(unresfp, " ");
2556 fputid10(unresfp, b);
2557 fprintf(unresfp, " ");
2558 fputid10(unresfp, c);
2559 fprintf(unresfp, " ");
2561 fprintf(unresfp, "\n");
2564 addtimes(QUARTETS, &tarr);
2566 if (savequartlh_optn) {
2572 FPRINTF(STDOUTFILE "\n");
2573 # endif /* PARALLEL */
2577 /* check the branching structure between the leaves (not the taxa!)
2578 A, B, C, and I (A, B, C, I don't need to be ordered). As a result,
2579 the two leaves that are closer related to each other than to leaf I
2580 are found in chooseA and chooseB. If the branching structure is
2581 not uniquely defined, ChooseA and ChooseB are chosen randomly
2582 from the possible taxa */
2583 void checkquartet(int A, int B, int C, int I)
2585 int i, j, a, b, taxon[5], leaf[5], ipos;
2586 unsigned char qresult;
2587 int notunique = FALSE;
2589 /* The relationship between leaves and taxa is defined by trueID */
2590 taxon[1] = trueID[A]; /* taxon number */
2591 leaf[1] = A; /* leaf number */
2592 taxon[2] = trueID[B];
2594 taxon[3] = trueID[C];
2596 taxon[4] = trueID[I];
2600 /* Source: Numerical Recipes (PIKSR2.C) */
2601 for (j = 2; j <= 4; j++) {
2605 while (i > 0 && taxon[i] > a) {
2606 taxon[i+1] = taxon[i];
2607 leaf[i+1] = leaf[i];
2614 /* where is leaf I ? */
2616 while (leaf[ipos] != I) ipos++;
2618 /* look at sequence quartet */
2619 qresult = readquartet(taxon[1], taxon[2], taxon[3], taxon[4]);
2621 /* chooseA and chooseB */
2625 /* one single branching structure */
2628 case 1: if (ipos == 1 || ipos == 2) {
2639 case 2: if (ipos == 1 || ipos == 3) {
2650 case 4: if (ipos == 1 || ipos == 4) {
2660 /* two possible branching structures */
2663 case 3: if (randominteger(2)) qresult = 1;
2669 case 5: if (randominteger(2)) qresult = 1;
2675 case 6: if (randominteger(2)) qresult = 2;
2680 /* three possible branching structures */
2683 case 7: qresult = (1 << randominteger(3)); /* 1, 2, or 4 */
2687 default: /* Program error [checkquartet] */
2689 FPRINTF(STDOUTFILE "\n\n\n(%2d)HALT: PLEASE REPORT ERROR K-PARALLEL TO DEVELOPERS (%d,%d,%d,%d) = %ld\n\n\n",
2690 PP_Myid, taxon[1], taxon[2], taxon[3], taxon[4],
2691 quart2num(taxon[1], taxon[2], taxon[3], taxon[4]));
2693 FPRINTF(STDOUTFILE "\n\n\nHALT: PLEASE REPORT ERROR K TO DEVELOPERS\n\n\n");
2697 } while (notunique);