5 * Part of TREE-PUZZLE 5.0 (June 2000)
7 * (c) 1999-2000 by Heiko A. Schmidt, Korbinian Strimmer,
8 * M. Vingron, and Arndt von Haeseler
9 * (c) 1995-1999 by Korbinian Strimmer and Arndt von Haeseler
11 * All parts of the source except where indicated are distributed under
12 * the GNU public licence. See http://www.opensource.org for details.
26 /******************************************************************************/
28 /******************************************************************************/
30 /* read ten characters of current line as identifier */
31 void readid(FILE *infp, int t)
35 for (i = 0; i < 26; i++) { /*CZ*/
37 if (ci == EOF || !isprint(ci)) {
38 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (no name for sequence %d)\n\n\n", t+1);
41 Identif[t][i] = (char) ci;
43 /* convert leading blanks in taxon name to underscores */
45 for (i = 25; i > -1; i--) { /*CZ*/
47 if (Identif[t][i] != ' ') flag = TRUE;
49 if (Identif[t][i] == ' ') Identif[t][i] = '_';
52 /* check whether this name is already used */
53 for (i = 0; i < t; i++) { /* compare with all other taxa */
54 flag = TRUE; /* assume identity */
55 for (j = 0; (j < 26) && (flag == TRUE); j++) /*CZ*/
56 if (Identif[t][j] != Identif[i][j])
59 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (multiple occurence of sequence name '");
61 FPRINTF(STDOUTFILE "')\n\n\n");
67 /* read next allowed character */
68 char readnextcharacter(FILE *ifp, int notu, int nsite)
72 /* ignore blanks and control characters except newline */
74 if (fscanf(ifp, "%c", &c) != 1) {
75 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (missing character at position %d in sequence '", nsite + 1);
77 FPRINTF(STDOUTFILE "')\n\n\n");
80 } while (c == ' ' || (iscntrl((int) c) && c != '\n'));
84 /* skip rest of the line */
85 void skiprestofline(FILE* ifp, int notu, int nsite)
89 /* read chars until the first newline */
93 FPRINTF(STDOUTFILE "Unable to proceed (missing newline at position %d in sequence '", nsite + 1);
95 FPRINTF(STDOUTFILE "')\n\n\n");
98 } while ((char) ci != '\n');
101 /* skip control characters and blanks */
102 void skipcntrl(FILE *ifp, int notu, int nsite)
106 /* read over all control characters and blanks */
110 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (missing character at position %d in sequence '", nsite + 1);
111 fputid(STDOUT, notu);
112 FPRINTF(STDOUTFILE "')\n\n\n");
115 } while (iscntrl(ci) || (char) ci == ' ');
116 /* go one character back */
117 if (ungetc(ci, ifp) == EOF) {
118 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (positioning error at position %d in sequence '", nsite + 1);
119 fputid(STDOUT, notu);
120 FPRINTF(STDOUTFILE "')\n\n\n");
125 /* read sequences of one data set */
126 void getseqs(FILE *ifp)
128 int notu, nsite, endofline, linelength, i;
131 seqchars = new_cmatrix(Maxspc, Maxseqc);
132 /* read all characters */
133 nsite = 0; /* next site to be read */
134 while (nsite < Maxseqc) {
135 /* read first taxon */
137 /* go to next true line */
138 skiprestofline(ifp, notu, nsite);
139 skipcntrl(ifp, notu, nsite);
140 if (nsite == 0) readid(ifp, notu);
144 c = readnextcharacter(ifp, notu, nsite + linelength);
145 if (c == '\n') endofline = TRUE;
147 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (invalid character '.' at position ");
148 FPRINTF(STDOUTFILE "%d in first sequence)\n\n\n", nsite + linelength + 1);
150 } else if (nsite + linelength < Maxseqc) {
151 /* change to upper case */
152 seqchars[notu][nsite + linelength] = (char) toupper((int) c);
156 skiprestofline(ifp, notu, nsite + linelength);
158 } while (!endofline);
159 if (linelength == 0) {
160 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (line with length 0 at position %d in sequence '", nsite + 1);
161 fputid(STDOUT, notu);
162 FPRINTF(STDOUTFILE "')\n\n\n");
165 /* read other taxa */
166 for (notu = 1; notu < Maxspc; notu++) {
167 /* go to next true line */
168 if (notu != 1) skiprestofline(ifp, notu, nsite);
169 skipcntrl(ifp, notu, nsite);
170 if (nsite == 0) readid(ifp, notu);
171 for (i = nsite; i < nsite + linelength; i++) {
172 c = readnextcharacter(ifp, notu, i);
173 if (c == '\n') { /* too short */
174 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (line to short at position %d in sequence '", i + 1);
175 fputid(STDOUT, notu);
176 FPRINTF(STDOUTFILE "')\n\n\n");
178 } else if (c == '.') {
179 seqchars[notu][i] = seqchars[0][i];
181 /* change to upper case */
182 seqchars[notu][i] = (char) toupper((int) c);
186 nsite = nsite + linelength;
190 /* initialize identifer array */
195 Identif = new_cmatrix(t, 26); /*CZ*/
196 for (i = 0; i < t; i++)
197 for (j = 0; j < 26; j++) /*CZ*/
201 /* print identifier of specified taxon in full 10 char length */
202 void fputid10(FILE *ofp, int t)
206 for (i = 0; i < 26; i++) fputc(Identif[t][i], ofp); /*CZ*/
209 /* print identifier of specified taxon up to first space */
210 int fputid(FILE *ofp, int t)
215 while (Identif[t][i] != ' ' && i < 26) { /*CZ*/
216 fputc(Identif[t][i], ofp);
222 /* read first line of sequence data set */
223 void getsizesites(FILE *ifp)
225 if (fscanf(ifp, "%d", &Maxspc) != 1) {
226 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (missing number of sequences)\n\n\n");
229 if (fscanf(ifp, "%d", &Maxseqc) != 1) {
230 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (missing number of sites)\n\n\n");
235 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (less than 4 sequences)\n\n\n");
238 if (Maxspc > 8000) { /*CZ*/
239 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (more than 8000 sequences)\n\n\n");
243 FPRINTF(STDOUTFILE "\n\n\nUnable to proceed (no sequence sites)\n\n\n");
246 Maxbrnch = 2*Maxspc - 3;
249 /* read one data set - PHYLIP interleaved */
250 void getdataset(FILE *ifp)
256 /* guess data type */
259 uli numnucs, numchars, numbins;
263 /* count A, C, G, T, U, N */
267 for (notu = 0; notu < Maxspc; notu++)
268 for (nsite = 0; nsite < Maxseqc; nsite++) {
269 c = seqchars[notu][nsite];
270 if (c == 'A' || c == 'C' || c == 'G' ||
271 c == 'T' || c == 'U' || c == 'N') numnucs++;
272 if (c != '-' && c != '?') numchars++;
273 if (c == '0' || c == '1') numbins++;
275 if (numchars == 0) numchars = 1;
276 /* more than 85 % frequency means nucleotide data */
277 if ((double) numnucs / (double) numchars > 0.85) return 0;
278 else if ((double) numbins / (double) numchars > 0.2) return 2;
282 /* translate characters into format used by ML engine */
283 void translatedataset()
290 /* determine Maxsite - number of ML sites per taxon */
291 if (data_optn == 0 && SH_optn) {
293 Maxsite = Maxseqc / 3;
295 Maxsite = Maxseqc / 2; /* assume doublets */
299 if (data_optn == 0 && (Maxsite % 3) == 0 && !SH_optn) {
300 if (codon_optn == 1 || codon_optn == 2 || codon_optn == 3)
301 Maxsite = Maxsite / 3; /* only one of the three codon positions */
303 Maxsite = 2*(Maxsite / 3); /* 1st + 2nd codon positions */
307 if (Seqchar != NULL) free_cmatrix(Seqchar);
308 Seqchar = new_cmatrix(Maxspc, Maxsite);
311 if (data_optn == 0 && SH_optn)
312 code = new_cvector(2);
314 code = new_cvector(1);
316 /* decode characters */
317 if (data_optn == 0 && SH_optn) { /* SH doublets */
319 for (notu = 0; notu < Maxspc; notu++) {
320 for (sn = 0; sn < Maxsite; sn++) {
321 for (co = 0; co < 2; co++) {
323 c = seqchars[notu][sn*3 + co];
325 c = seqchars[notu][sn*2 + co];
328 Seqchar[notu][sn] = code2int(code);
332 } else if (!(data_optn == 0 && (Maxseqc % 3) == 0)) { /* use all */
334 for (notu = 0; notu < Maxspc; notu++) {
335 for (sn = 0; sn < Maxsite; sn++) {
336 code[0] = seqchars[notu][sn];
337 Seqchar[notu][sn] = code2int(code);
341 } else { /* codons */
343 for (notu = 0; notu < Maxspc; notu++) {
344 for (sn = 0; sn < Maxsite; sn++) {
345 if (codon_optn == 1 || codon_optn == 2 || codon_optn == 3)
346 code[0] = seqchars[notu][sn*3+codon_optn-1];
347 else if (codon_optn == 4) {
349 code[0] = seqchars[notu][(sn/2)*3];
351 code[0] = seqchars[notu][((sn-1)/2)*3+1];
353 code[0] = seqchars[notu][sn];
354 Seqchar[notu][sn] = code2int(code);
362 /* estimate mean base frequencies from translated data set */
363 void estimatebasefreqs()
368 tpmradix = gettpmradix();
370 if (Freqtpm != NULL) free_dvector(Freqtpm);
371 Freqtpm = new_dvector(tpmradix);
373 if (Basecomp != NULL) free_imatrix(Basecomp);
374 Basecomp = new_imatrix(Maxspc, tpmradix);
376 gene = (uli *) malloc((unsigned) ((tpmradix + 1) * sizeof(uli)));
377 if (gene == NULL) maerror("gene in estimatebasefreqs");
379 for (i = 0; i < tpmradix + 1; i++) gene[i] = 0;
380 for (i = 0; i < Maxspc; i++)
381 for (j = 0; j < tpmradix; j++) Basecomp[i][j] = 0;
382 for (i = 0; i < Maxspc; i++)
383 for (j = 0; j < Maxsite; j++) {
384 gene[(int) Seqchar[i][j]]++;
385 if (Seqchar[i][j] != tpmradix) Basecomp[i][(int) Seqchar[i][j]]++;
388 all = Maxspc * Maxsite - gene[tpmradix];
389 if (all != 0) { /* normal case */
390 for (i = 0; i < tpmradix; i++)
391 Freqtpm[i] = (double) gene[i] / (double) all;
392 } else { /* pathological case with no unique character in data set */
393 for (i = 0; i < tpmradix; i++)
394 Freqtpm[i] = 1.0 / (double) tpmradix;
402 /* guess model of substitution */
405 double c1, c2, c3, c4, c5, c6;
414 blosum62_optn = FALSE;
423 if (data_optn == 1) { /* amino acids */
425 /* chi2 fit to amino acid frequencies */
428 a = new_dmatrix(20,20);
429 /* chi2 distance Dayhoff */
432 for (i = 0; i < 20; i++)
433 c1 = c1 + (Freqtpm[i]-f[i])*(Freqtpm[i]-f[i]);
434 /* chi2 distance JTT */
437 for (i = 0; i < 20; i++)
438 c2 = c2 + (Freqtpm[i]-f[i])*(Freqtpm[i]-f[i]);
439 /* chi2 distance mtREV */
442 for (i = 0; i < 20; i++)
443 c3 = c3 + (Freqtpm[i]-f[i])*(Freqtpm[i]-f[i]);
444 /* chi2 distance VT */
447 for (i = 0; i < 20; i++)
448 c4 = c4 + (Freqtpm[i]-f[i])*(Freqtpm[i]-f[i]);
449 /* chi2 distance WAG */
452 for (i = 0; i < 20; i++)
453 c5 = c5 + (Freqtpm[i]-f[i])*(Freqtpm[i]-f[i]);
454 /* chi2 distance cpREV */
457 for (i = 0; i < 20; i++)
458 c6 = c6 + (Freqtpm[i]-f[i])*(Freqtpm[i]-f[i]);
464 if ((c1 < c2) && (c1 < c3) && (c1 < c4) && (c1 < c5)) {
472 FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on nuclear DNA)\n");
474 if ((c2 < c3) && (c2 < c4) && (c2 < c5)) {
482 FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on nuclear DNA)\n");
484 if ((c3 < c4) && (c3 < c5)) {
492 FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on mtDNA)\n");
502 FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on nuclear DNA)\n");
511 FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on nuclear DNA)\n");
512 } /* if c4 else c5 */
513 } /* if c3 else c4 */
519 if ((c1 < c2) && (c1 < c3) && (c1 < c4) && (c1 < c5) && (c1 < c6)) {
527 FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on nuclear DNA)\n");
529 if ((c2 < c3) && (c2 < c4) && (c2 < c5) && (c2 < c6)) {
537 FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on nuclear DNA)\n");
539 if ((c3 < c4) && (c3 < c5) && (c3 < c6)) {
547 FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on mtDNA)\n");
549 if ((c4 < c5) && (c4 < c6)) {
557 FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on nuclear DNA)\n");
567 FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on nuclear DNA)\n");
577 FPRINTF(STDOUTFILE "(consists very likely of amino acids encoded on cpDNA)\n");
578 } /* if c5 else c6 */
579 } /* if c4 else c5 */
580 } /* if c3 else c4 */
585 } else if (data_optn == 0) {
586 FPRINTF(STDOUTFILE "(consists very likely of nucleotides)\n");
588 FPRINTF(STDOUTFILE "(consists very likely of binary state data)\n");
593 /******************************************************************************/
594 /* functions for representing and building puzzling step trees */
595 /******************************************************************************/
597 /* initialize tree with the following starting configuration
609 /* allocate the memory for the whole tree */
611 /* allocate memory for vector with all the edges of the tree */
612 edge = (ONEEDGE *) calloc(Maxbrnch, sizeof(ONEEDGE) );
613 if (edge == NULL) maerror("edge in inittree");
615 /* allocate memory for vector with edge numbers of leaves */
616 edgeofleaf = (int *) calloc(Maxspc, sizeof(int) );
617 if (edgeofleaf == NULL) maerror("edgeofleaf in inittree");
619 /* allocate memory for all the edges the edge map */
620 for (i = 0; i < Maxbrnch; i++) {
621 edge[i].edgemap = (int *) calloc(Maxbrnch, sizeof(int) );
622 if (edge[i].edgemap == NULL) maerror("edgemap in inittree");
625 /* number all edges */
626 for (i = 0; i < Maxbrnch; i++) edge[i].numedge = i;
628 /* initialize tree */
634 (edge[0].edgemap)[0] = 0; /* you are on the right edge */
635 (edge[0].edgemap)[1] = 4; /* go down left for leaf 1 */
636 (edge[0].edgemap)[2] = 5; /* go down right for leaf 2 */
637 (edge[1].edgemap)[0] = 1; /* go up for leaf 0 */
638 (edge[1].edgemap)[1] = 0; /* you are on the right edge */
639 (edge[1].edgemap)[2] = 3; /* go up/down right for leaf 2 */
640 (edge[2].edgemap)[0] = 1; /* go up for leaf 0 */
641 (edge[2].edgemap)[1] = 2; /* go up/down left for leaf 1 */
642 (edge[2].edgemap)[2] = 0; /* you are on the right edge */
644 /* interconnection */
646 edge[0].downleft = &edge[1];
647 edge[0].downright = &edge[2];
648 edge[1].up = &edge[0];
649 edge[1].downleft = NULL;
650 edge[1].downright = NULL;
651 edge[2].up = &edge[0];
652 edge[2].downleft = NULL;
653 edge[2].downright = NULL;
655 /* edges of leaves */
661 /* add next leaf on the specified edge */
662 void addnextleaf(int dockedge)
666 if (dockedge >= nextedge) {
667 /* Trying to add leaf nextleaf to nonexisting edge dockedge */
668 FPRINTF(STDOUTFILE "\n\n\nHALT: PLEASE REPORT ERROR F TO DEVELOPERS\n\n\n");
672 if (nextleaf >= Maxspc) {
673 /* Trying to add leaf nextleaf to a tree with Maxspc leaves */
674 FPRINTF(STDOUTFILE "\n\n\nHALT: PLEASE REPORT ERROR G TO DEVELOPERS\n\n\n");
678 /* necessary change in edgeofleaf if dockedge == edgeofleaf[0] */
679 if (edgeofleaf[0] == dockedge) edgeofleaf[0] = nextedge;
681 /* adding nextedge to the tree */
682 edge[nextedge].up = edge[dockedge].up;
683 edge[nextedge].downleft = &edge[dockedge];
684 edge[nextedge].downright = &edge[nextedge+1];
685 edge[dockedge].up = &edge[nextedge];
687 if (edge[nextedge].up != NULL) {
688 if ( ((edge[nextedge].up)->downleft) == &edge[dockedge] )
689 (edge[nextedge].up)->downleft = &edge[nextedge];
691 (edge[nextedge].up)->downright = &edge[nextedge];
694 /* adding nextedge + 1 to the tree */
695 edge[nextedge+1].up = &edge[nextedge];
696 edge[nextedge+1].downleft = NULL;
697 edge[nextedge+1].downright = NULL;
698 edgeofleaf[nextleaf] = nextedge+1;
700 /* the two new edges get info about the old edges */
702 for (i = 0; i < nextedge; i++) {
703 switch ( (edge[dockedge].edgemap)[i] ) {
705 /* down right changes to down left */
706 case 5: (edge[nextedge].edgemap)[i] = 4;
709 /* null changes to down left */
710 case 0: (edge[nextedge].edgemap)[i] = 4;
713 default: (edge[nextedge].edgemap)[i] =
714 (edge[dockedge].edgemap)[i];
720 for (i = 0; i < nextedge; i++) {
721 switch ( (edge[dockedge].edgemap)[i] ) {
723 /* up/down left changes to up */
724 case 2: (edge[nextedge+1].edgemap)[i] = 1;
727 /* up/down right changes to up */
728 case 3: (edge[nextedge+1].edgemap)[i] = 1;
731 /* down left changes to up/down left */
732 case 4: (edge[nextedge+1].edgemap)[i] = 2;
735 /* down right changes to up/down left */
736 case 5: (edge[nextedge+1].edgemap)[i] = 2;
739 /* null changes to up/down left */
740 case 0: (edge[nextedge+1].edgemap)[i] = 2;
744 default: (edge[nextedge+1].edgemap)[i] =
745 (edge[dockedge].edgemap)[i];
751 for (i = 0; i < nextedge; i++) {
752 switch ( (edge[dockedge].edgemap)[i] ) {
754 /* up/down right changes to up */
755 case 3: (edge[dockedge].edgemap)[i] = 1;
758 /* up/down left changes to up */
759 case 2: (edge[dockedge].edgemap)[i] = 1;
766 /* all edgemaps are updated for the two new edges */
768 (edge[nextedge].edgemap)[nextedge] = 0;
769 (edge[nextedge].edgemap)[nextedge+1] = 5; /* down right */
772 (edge[nextedge+1].edgemap)[nextedge] = 1; /* up */
773 (edge[nextedge+1].edgemap)[nextedge+1] = 0;
775 /* all other edges */
776 for (i = 0; i < nextedge; i++) {
777 (edge[i].edgemap)[nextedge] = (edge[i].edgemap)[dockedge];
778 (edge[i].edgemap)[nextedge+1] = (edge[i].edgemap)[dockedge];
781 /* an extra for dockedge */
782 (edge[dockedge].edgemap)[nextedge] = 1; /* up */
783 (edge[dockedge].edgemap)[nextedge+1] = 3; /* up/down right */
786 nextedge = nextedge + 2;
790 /* free memory (to be called after inittree) */
795 for (i = 0; i < 2 * Maxspc - 3; i++) free(edge[i].edgemap);
800 /* writes OTU sitting on edge ed */
801 void writeOTU(FILE *outfp, int ed)
805 /* test whether we are on a leaf */
806 if (edge[ed].downright == NULL && edge[ed].downleft == NULL) {
807 for (i = 1; i < nextleaf; i++) {
808 if (edgeofleaf[i] == ed) { /* i is the leaf of ed */
809 column += fputid(outfp, trueID[i]);
815 /* we are NOT on a leaf */
818 writeOTU(outfp, edge[ed].downleft->numedge);
824 fprintf(outfp, "\n ");
826 writeOTU(outfp, edge[ed].downright->numedge);
832 void writetree(FILE *outfp)
836 column += fputid(outfp, trueID[0]) + 3;
838 writeOTU(outfp, edge[edgeofleaf[0]].downleft->numedge);
842 writeOTU(outfp, edge[edgeofleaf[0]].downright->numedge);
843 fprintf(outfp, ");\n");
847 /* clear all edgeinfos */
852 for (i = 0; i < nextedge; i++)
853 edge[i].edgeinfo = 0;
854 } /* resetedgeinfo */
856 /* increment all edgeinfo between leaf A and B */
857 void incrementedgeinfo(int A, int B)
859 int curredge, finaledge, nextstep;
863 finaledge = edgeofleaf[B];
865 curredge = edgeofleaf[A];
866 edge[curredge].edgeinfo = edge[curredge].edgeinfo + 1;
868 while (curredge != finaledge) {
869 nextstep = (edge[curredge].edgemap)[finaledge];
873 case 1: curredge = (edge[curredge].up)->numedge;
877 case 2: curredge = ((edge[curredge].up)->downleft)->numedge;
881 case 3: curredge = ((edge[curredge].up)->downright)->numedge;
885 case 4: curredge = (edge[curredge].downleft)->numedge;
889 case 5: curredge = (edge[curredge].downright)->numedge;
893 edge[curredge].edgeinfo = edge[curredge].edgeinfo + 1;
895 } /* incrementedgeinfo */
897 /* checks which edge has the lowest edgeinfo
898 if there are several edges with the same lowest edgeinfo,
899 one of them will be selected randomly */
900 void minimumedgeinfo()
902 int i, k, howmany, randomnum;
906 mininfo = edge[0].edgeinfo;
907 for (i = 1; i < nextedge; i++)
908 if (edge[i].edgeinfo <= mininfo) {
909 if (edge[i].edgeinfo == mininfo) {
913 mininfo = edge[i].edgeinfo;
918 if (howmany > 1) { /* draw random edge */
919 randomnum = randominteger(howmany) + 1; /* 1 to howmany */
921 for (k = 0; k < randomnum; k++) {
924 } while (edge[i].edgeinfo != mininfo);
928 } /* minimumedgeinfo */
933 /*******************************************/
935 /*******************************************/
937 /* compute address of the 4 int (sort key) in the 4 int node */
938 int ct_sortkeyaddr(int addr)
949 /* compute address of the next edge pointer in a 4 int node (0->1->2->0) */
950 int ct_nextedgeaddr(int addr)
954 if ( a == 2 ) { res = addr - 2; }
955 else { res = addr + 1; }
962 /* compute address of 1st edge of a 4 int node from node number */
963 int ct_1stedge(int node)
973 /* compute address of 2nd edge of a 4 int node from node number */
974 int ct_2ndedge(int node)
984 /* compute address of 3rd edge of a 4 int node from node number */
985 int ct_3rdedge(int node)
995 /* check whether node 'node' is a leaf (2nd/3rd edge pointer = -1) */
996 int ct_isleaf(int node, int *ctree)
998 return (ctree[ct_3rdedge(node)] < 0);
1004 /* compute node number of 4 int node from an edge addr. */
1005 int ct_addr2node(int addr)
1009 res = (int) ((addr - a) / 4);
1016 /* print graph pointers for checking */
1017 void printctree(int *ctree)
1020 for (n=0; n < 2*Maxspc; n++) {
1021 printf("n[%3d] = (%3d.%2d, %3d.%2d, %3d.%2d | %3d)\n", n,
1022 (int) ctree[ct_1stedge(n)]/4,
1023 (int) ctree[ct_1stedge(n)]%4,
1024 (int) ctree[ct_2ndedge(n)]/4,
1025 (int) ctree[ct_2ndedge(n)]%4,
1026 (int) ctree[ct_3rdedge(n)]/4,
1027 (int) ctree[ct_3rdedge(n)]%4,
1028 ctree[ct_3rdedge(n)+1]);
1036 /* allocate memory for ctree 3 ints pointer plus 1 check byte */
1042 snodes = (int *) malloc(4 * 2 * Maxspc * sizeof(int));
1043 if (snodes == NULL) maerror("snodes in copytree");
1045 for (n=0; n<(4 * 2 * Maxspc); n++) {
1054 /* free memory of a tree for sorting */
1055 void freectree(int **snodes)
1064 /* copy subtree recursively */
1065 void copyOTU(int *ctree, /* tree array struct */
1066 int *ct_nextnode, /* next free node */
1067 int ct_curredge, /* currende edge to add subtree */
1068 int *ct_nextleaf, /* next free leaf (0-maxspc) */
1069 int ed) /* edge in puzzling step tree */
1071 int i, nextcurredge;
1073 /* test whether we are on a leaf */
1074 if (edge[ed].downright == NULL && edge[ed].downleft == NULL) {
1075 for (i = 1; i < nextleaf; i++) {
1076 if (edgeofleaf[i] == ed) { /* i is the leaf of ed */
1077 nextcurredge = ct_1stedge(*ct_nextleaf);
1078 ctree[ct_curredge] = nextcurredge;
1079 ctree[nextcurredge] = ct_curredge;
1080 ctree[ct_sortkeyaddr(nextcurredge)] = trueID[i];
1087 /* we are NOT on a leaf */
1088 nextcurredge = ct_1stedge(*ct_nextnode);
1089 ctree[ct_curredge] = nextcurredge;
1090 ctree[nextcurredge] = ct_curredge;
1092 nextcurredge = ct_nextedgeaddr(nextcurredge);
1093 copyOTU(ctree, ct_nextnode, nextcurredge,
1094 ct_nextleaf, edge[ed].downleft->numedge);
1096 nextcurredge = ct_nextedgeaddr(nextcurredge);
1097 copyOTU(ctree, ct_nextnode, nextcurredge,
1098 ct_nextleaf, edge[ed].downright->numedge);
1104 /* copy treestructure to sorting structure */
1105 void copytree(int *ctree)
1111 ct_nextnode = Maxspc;
1112 ct_curredge = ct_1stedge(ct_nextnode);
1115 ctree[ct_1stedge(0)] = ct_curredge;
1116 ctree[ct_curredge] = ct_1stedge(0);
1117 ctree[ct_sortkeyaddr(0)] = trueID[0];
1121 ct_curredge = ct_nextedgeaddr(ct_curredge);
1122 copyOTU(ctree, &ct_nextnode, ct_curredge,
1123 &ct_nextleaf, edge[edgeofleaf[0]].downleft->numedge);
1125 ct_curredge = ct_nextedgeaddr(ct_curredge);
1126 copyOTU(ctree, &ct_nextnode, ct_curredge,
1127 &ct_nextleaf, edge[edgeofleaf[0]].downright->numedge);
1133 /* sort subtree from edge recursively by indices */
1134 int sortOTU(int edge, int *ctree)
1140 if (ctree[ct_2ndedge((int) (edge / 4))] < 0)
1141 return ctree[ct_sortkeyaddr(edge)];
1143 edge1 = ctree[ct_nextedgeaddr(edge)];
1144 edge2 = ctree[ct_nextedgeaddr(ct_nextedgeaddr(edge))];
1146 /* printf ("visiting [%5d] -> [%5d], [%5d]\n", edge, edge1, edge2); */
1147 /* printf ("visiting [%2d.%2d] -> [%2d.%2d], [%2d.%2d]\n",
1148 (int)(edge/4), edge%4, (int)(edge1/4), edge1%4,
1149 (int)(edge2/4), edge2%4); */
1151 key1 = sortOTU(edge1, ctree);
1152 key2 = sortOTU(edge2, ctree);
1155 tempedge = ctree[ctree[edge1]];
1156 ctree[ctree[edge1]] = ctree[ctree[edge2]];
1157 ctree[ctree[edge2]] = tempedge;
1158 tempedge = ctree[edge1];
1159 ctree[edge1] = ctree[edge2];
1160 ctree[edge2] = tempedge;
1161 ctree[ct_sortkeyaddr(edge)] = key2;
1164 ctree[ct_sortkeyaddr(edge)] = key1;
1166 return ctree[ct_sortkeyaddr(edge)];
1172 /* sort ctree recursively by indices */
1173 int sortctree(int *ctree)
1175 int n, startnode=-1;
1176 for(n=0; n<Maxspc; n++) {
1177 if (ctree[ct_sortkeyaddr(n*4)] == 0)
1180 sortOTU(ctree[startnode * 4], ctree);
1187 /* print recursively subtree of edge of sorted tree ctree */
1188 void printfsortOTU(int edge, int *ctree)
1192 if (ctree[ct_2ndedge((int) (edge / 4))] < 0) {
1193 printf("%d", ctree[ct_sortkeyaddr(edge)]);
1197 edge1 = ctree[ct_nextedgeaddr(edge)];
1198 edge2 = ctree[ct_nextedgeaddr(ct_nextedgeaddr(edge))];
1201 printfsortOTU(edge1, ctree);
1203 printfsortOTU(edge2, ctree);
1211 /* print recursively sorted tree ctree */
1212 int printfsortctree(int *ctree)
1214 int n, startnode=-1;
1215 for(n=0; n<Maxspc; n++) {
1216 if (ctree[ct_sortkeyaddr(n*4)] == 0)
1219 printf ("(%d,", ctree[ct_sortkeyaddr(startnode*4)]);
1220 printfsortOTU(ctree[startnode * 4], ctree);
1228 /* print recursively subtree of edge of sorted tree ctree to string */
1229 void sprintfOTU(char *str, int *len, int edge, int *ctree)
1233 if (ctree[ct_2ndedge((int) (edge / 4))] < 0) {
1234 *len+=sprintf(&(str[*len]), "%d", ctree[ct_sortkeyaddr(edge)]);
1238 edge1 = ctree[ct_nextedgeaddr(edge)];
1239 edge2 = ctree[ct_nextedgeaddr(ct_nextedgeaddr(edge))];
1241 sprintf(&(str[*len]), "(");
1243 sprintfOTU(str, len, edge1, ctree);
1244 sprintf(&(str[*len]), ",");
1246 sprintfOTU(str, len, edge2, ctree);
1247 sprintf(&(str[*len]), ")");
1253 /* print recursively sorted tree ctree to string */
1254 char *sprintfctree(int *ctree, int strglen)
1261 treestr = (char *) malloc(strglen * sizeof(char));
1263 for(n=0; n<Maxspc; n++) {
1264 if (ctree[ct_sortkeyaddr(n*4)] == 0)
1267 len+=sprintf (&(tmpptr[len]), "(%d,", ctree[ct_sortkeyaddr(startnode*4)]);
1268 sprintfOTU(tmpptr, &len, ctree[startnode * 4], ctree);
1269 len+=sprintf (&(tmpptr[len]), ");");
1277 /***********************************************/
1278 /* establish and handle a list of sorted trees */
1279 /***********************************************/
1283 /* initialize structure */
1284 treelistitemtype *inittreelist(int *treenum)
1293 /* malloc new tree list item */
1294 treelistitemtype *gettreelistitem()
1296 treelistitemtype *tmpptr;
1297 tmpptr = (treelistitemtype *)malloc(sizeof(treelistitemtype));
1298 if (tmpptr == NULL) maerror("item of intermediate tree stuctures");
1299 (*tmpptr).pred = NULL;
1300 (*tmpptr).succ = NULL;
1301 (*tmpptr).tree = NULL;
1302 (*tmpptr).count = 0;
1303 (*tmpptr).idx = itemcount++;
1309 /* free whole tree list */
1310 void freetreelist(treelistitemtype **list,
1314 treelistitemtype *current;
1315 treelistitemtype *next;
1317 while (current != NULL) {
1318 next = (*current).succ;
1319 if ((*current).tree != NULL) {
1320 free ((*current).tree);
1321 (*current).tree = NULL;
1329 } /* freetreelist */
1334 /* add tree to the tree list */
1335 treelistitemtype *addtree2list(char **tree, /* sorted tree string */
1336 int numtrees, /* how many occurred, e.g. in parallel */
1337 treelistitemtype **list, /* addr. of tree list */
1341 treelistitemtype *tmpptr = NULL;
1342 treelistitemtype *newptr = NULL;
1346 if ((*list == NULL) || (numitems == 0)) {
1347 newptr = gettreelistitem();
1348 (*newptr).tree = *tree;
1350 (*newptr).id = *numitems;
1351 (*newptr).count = numtrees;
1358 result = strcmp( (*tmpptr).tree, *tree);
1360 free(*tree); *tree = NULL;
1361 (*tmpptr).count += numtrees;
1362 *numsum += numtrees;
1365 } else { if (result < 0) {
1366 if ((*tmpptr).succ != NULL)
1367 tmpptr = (*tmpptr).succ;
1369 newptr = gettreelistitem();
1370 (*newptr).tree = *tree;
1372 (*newptr).id = *numitems;
1373 (*newptr).count = numtrees;
1374 (*newptr).pred = tmpptr;
1375 (*tmpptr).succ = newptr;
1377 *numsum += numtrees;
1380 } else { /* result < 0 */
1381 newptr = gettreelistitem();
1382 (*newptr).tree = *tree;
1384 (*newptr).id = *numitems;
1385 (*newptr).count = numtrees;
1386 (*newptr).succ = tmpptr;
1387 (*newptr).pred = (*tmpptr).pred;
1388 (*tmpptr).pred = newptr;
1389 *numsum += numtrees;
1391 if ((*newptr).pred != NULL) {
1392 (*(*newptr).pred).succ = newptr;
1398 } /* end if result < 0 */
1399 } /* end if result != 0 */
1400 } /* while searching in list */
1401 } /* if list empty, else */
1403 } /* addtree2list */
1408 /* resort list of trees by number of occurences for output */
1409 void sortbynum(treelistitemtype *list, treelistitemtype **sortlist)
1411 treelistitemtype *tmpptr = NULL;
1412 treelistitemtype *curr = NULL;
1413 treelistitemtype *next = NULL;
1416 if (list == NULL) fprintf(stderr, "Grrrrrrrrr>>>>\n");
1419 while (tmpptr != NULL) {
1420 (*tmpptr).sortnext = (*tmpptr).succ;
1421 (*tmpptr).sortlast = (*tmpptr).pred;
1422 tmpptr = (*tmpptr).succ;
1425 while (xchange > 0) {
1428 if (curr == NULL) fprintf(stderr, "Grrrrrrrrr>>>>\n");
1429 while((*curr).sortnext != NULL) {
1430 next = (*curr).sortnext;
1431 if ((*curr).count >= (*next).count)
1432 curr = (*curr).sortnext;
1434 if ((*curr).sortlast != NULL)
1435 (*((*curr).sortlast)).sortnext = next;
1436 if (*sortlist == curr)
1438 (*next).sortlast = (*curr).sortlast;
1440 if ((*next).sortnext != NULL)
1441 (*((*next).sortnext)).sortlast = curr;
1442 (*curr).sortnext = (*next).sortnext;
1444 (*curr).sortlast = next;
1445 (*next).sortnext = curr;
1456 /* print puzzling step tree stuctures for checking */
1457 void printfpstrees(treelistitemtype *list)
1460 treelistitemtype *tmpptr = NULL;
1463 while (tmpptr != NULL) {
1464 printf ("%c[%2d] %5d %s\n", ch, (*tmpptr).idx, (*tmpptr).count, (*tmpptr).tree);
1465 tmpptr = (*tmpptr).succ;
1472 /* print sorted puzzling step tree stucture with names */
1473 void fprintffullpstree(FILE *outf, char *treestr)
1478 for(n=0; treestr[n] != '\0'; n++){
1479 while(isdigit((int)treestr[n])){
1480 idnum = (10 * idnum) + ((int)treestr[n]-48);
1488 (void)fputid(outf, idnum);
1495 fprintf(outf, "%c", treestr[n]);
1502 /* print sorted puzzling step tree stuctures with names */
1503 void fprintfsortedpstrees(FILE *output,
1504 treelistitemtype *list, /* tree list */
1505 int itemnum, /* order number */
1506 int itemsum, /* number of trees */
1507 int comment, /* with statistics, or puzzle report ? */
1508 float cutoff) /* cutoff percentage */
1510 treelistitemtype *tmpptr = NULL;
1511 treelistitemtype *slist = NULL;
1515 if (list == NULL) fprintf(stderr, "Grrrrrrrrr>>>>\n");
1516 sortbynum(list, &slist);
1519 while (tmpptr != NULL) {
1520 percent = (float)(100.0 * (*tmpptr).count / itemsum);
1521 if ((cutoff == 0.0) || (cutoff <= percent)) {
1523 fprintf (output, "[ %d. %d %.2f %d %d %d ]", num++, (*tmpptr).count, percent, (*tmpptr).id, itemnum, itemsum);
1526 fprintf (output, "\n");
1527 fprintf (output, "The following tree(s) occured in more than %.2f%% of the %d puzzling steps.\n", cutoff, itemsum);
1528 fprintf (output, "The trees are orderd descending by the number of occurences.\n");
1529 fprintf (output, "\n");
1530 fprintf (output, "\n occurences ID Phylip tree\n");
1532 fprintf (output, "%2d. %5d %6.2f%% %5d ", num++, (*tmpptr).count, percent, (*tmpptr).id);
1534 fprintffullpstree(output, (*tmpptr).tree);
1535 fprintf (output, "\n");
1537 tmpptr = (*tmpptr).sortnext;
1541 fprintf (output, "\n");
1543 case 1: fprintf (output, "There were no tree topologies (out of %d) occuring with a percentage >= %.2f%% of the %d puzzling steps.\n", itemnum, cutoff, itemsum); break;
1544 case 2: fprintf (output, "There was one tree topology (out of %d) occuring with a percentage >= %.2f%%.\n", itemnum, cutoff); break;
1545 default: fprintf (output, "There were %d tree topologies (out of %d) occuring with a percentage >= %.2f%%.\n", num-1, itemnum, cutoff); break;
1547 fprintf (output, "\n");
1548 fprintf (output, "\n");
1551 } /* fprintfsortedpstrees */
1555 /* print sorted tree topologies for checking */
1556 void printfsortedpstrees(treelistitemtype *list)
1558 treelistitemtype *tmpptr = NULL;
1559 treelistitemtype *slist = NULL;
1561 sortbynum(list, &slist);
1564 while (tmpptr != NULL) {
1565 printf ("[%2d] %5d %s\n", (*tmpptr).idx, (*tmpptr).count, (*tmpptr).tree);
1566 tmpptr = (*tmpptr).sortnext;
1568 } /* printfsortedpstrees */
1571 /*******************************************/
1572 /* end of tree sorting */
1573 /*******************************************/
1577 /******************************************************************************/
1578 /* functions for computing the consensus tree */
1579 /******************************************************************************/
1581 /* prepare for consensus tree analysis */
1582 void initconsensus()
1585 biparts = new_cmatrix(Maxspc-3, Maxspc);
1586 # endif /* PARALLEL */
1588 if (Maxspc % 32 == 0)
1589 splitlength = Maxspc/32;
1590 else splitlength = (Maxspc + 32 - (Maxspc % 32))/32;
1591 numbiparts = 0; /* no pattern stored so far */
1592 maxbiparts = 0; /* no memory reserved so far */
1594 splitpatterns = NULL;
1596 splitcomp = (uli *) malloc(splitlength * sizeof(uli) );
1597 if (splitcomp == NULL) maerror("splitcomp in initconsensus");
1600 /* prototype needed for recursive function */
1601 void makepart(int i, int curribrnch);
1603 /* recursive function to get bipartitions */
1604 void makepart(int i, int curribrnch)
1608 if ( edge[i].downright == NULL ||
1609 edge[i].downleft == NULL) { /* if i is leaf */
1611 /* check out what leaf j sits on this edge i */
1612 for (j = 1; j < Maxspc; j++) {
1613 if (edgeofleaf[j] == i) {
1614 biparts[curribrnch][trueID[j]] = '*';
1618 } else { /* still on inner branch */
1619 makepart(edge[i].downleft->numedge, curribrnch);
1620 makepart(edge[i].downright->numedge, curribrnch);
1624 /* compute bipartitions of tree of current puzzling step */
1625 void computebiparts()
1627 int i, j, curribrnch;
1631 for (i = 0; i < Maxspc - 3; i++)
1632 for (j = 0; j < Maxspc; j++)
1633 biparts[i][j] = '.';
1635 for (i = 0; i < Maxbrnch; i++) {
1636 if (!( edgeofleaf[0] == i ||
1637 edge[i].downright == NULL ||
1638 edge[i].downleft == NULL) ) { /* check all inner branches */
1640 makepart(i, curribrnch);
1642 /* make sure that the root is always a '*' */
1643 if (biparts[curribrnch][outgroup] == '.') {
1644 for (j = 0; j < Maxspc; j++) {
1645 if (biparts[curribrnch][j] == '.')
1646 biparts[curribrnch][j] = '*';
1648 biparts[curribrnch][j] = '.';
1655 /* print out the bipartition n of all different splitpatterns */
1656 void printsplit(FILE *fp, uli n)
1662 for (i = 0; i < splitlength; i++) {
1663 z = splitpatterns[n*splitlength + i];
1664 for (j = 0; j < 32 && col < Maxspc; j++) {
1665 if (col % 10 == 0 && col != 0) fprintf(fp, " ");
1666 if (z & 1) fprintf(fp, ".");
1667 else fprintf(fp, "*");
1674 /* make new entries for new different bipartitions and count frequencies */
1675 void makenewsplitentries()
1677 int i, j, bpc, identical, idflag, bpsize;
1678 uli nextentry, obpc;
1680 /* where the next entry would be in splitpatterns */
1681 nextentry = numbiparts;
1683 for (bpc = 0; bpc < Maxspc - 3; bpc++) { /* for every new bipartition */
1684 /* convert bipartition into a more compact format */
1686 for (i = 0; i < splitlength; i++) {
1688 for (j = 0; j < 32; j++) {
1689 splitcomp[i] = splitcomp[i] >> 1;
1690 if (i*32 + j < Maxspc)
1691 if (biparts[bpc][i*32 + j] == '.') {
1692 /* set highest bit */
1693 splitcomp[i] = (splitcomp[i] | 2147483648UL);
1694 bpsize++; /* count the '.' */
1698 /* compare to the *old* patterns */
1700 for (obpc = 0; (obpc < numbiparts) && (!identical); obpc++) {
1701 /* compare first partition size */
1702 if (splitsizes[obpc] == bpsize) idflag = TRUE;
1703 else idflag = FALSE;
1704 /* if size is identical compare whole partition */
1705 for (i = 0; (i < splitlength) && idflag; i++)
1706 if (splitcomp[i] != splitpatterns[obpc*splitlength + i])
1708 if (idflag) identical = TRUE;
1710 if (identical) { /* if identical increase frequency */
1711 splitfreqs[2*(obpc-1)]++;
1712 } else { /* create new entry */
1713 if (nextentry == maxbiparts) { /* reserve more memory */
1714 maxbiparts = maxbiparts + 2*Maxspc;
1715 splitfreqs = (uli *) myrealloc(splitfreqs,
1716 2*maxbiparts * sizeof(uli) );
1717 /* 2x: splitfreqs contains also an index (sorting!) */
1718 if (splitfreqs == NULL) maerror("splitfreqs in makenewsplitentries");
1719 splitpatterns = (uli *) myrealloc(splitpatterns,
1720 splitlength*maxbiparts * sizeof(uli) );
1721 if (splitpatterns == NULL) maerror("splitpatterns in makenewsplitentries");
1722 splitsizes = (int *) myrealloc(splitsizes,
1723 maxbiparts * sizeof(int) );
1724 if (splitsizes == NULL) maerror("splitsizes in makenewsplitentries");
1726 splitfreqs[2*nextentry] = 1; /* frequency */
1727 splitfreqs[2*nextentry+1] = nextentry; /* index for sorting */
1728 for (i = 0; i < splitlength; i++)
1729 splitpatterns[nextentry*splitlength + i] = splitcomp[i];
1730 splitsizes[nextentry] = bpsize;
1734 numbiparts = nextentry;
1739 - every entry in consbiparts is one node of the consensus tree
1740 - for each node one has to know which taxa and which other nodes
1741 are *directly* descending from it
1742 - for every taxon/node number there is a flag that shows
1743 whether it descends from the node or not
1744 - '0' means that neither a taxon nor another node with the
1745 corresponding number decends from the node
1746 '1' means that the corresponding taxon descends from the node
1747 '2' means that the corresponding node descends from the node
1748 '3' means that the corresponding taxon and node descends from the node
1751 /* copy bipartition n of all different splitpatterns to consbiparts[k] */
1752 void copysplit(uli n, int k)
1758 for (i = 0; i < splitlength; i++) {
1759 z = splitpatterns[n*splitlength + i];
1760 for (j = 0; j < 32 && col < Maxspc; j++) {
1761 if (z & 1) consbiparts[k][col] = '1';
1762 else consbiparts[k][col] = '0';
1769 /* compute majority rule consensus tree */
1770 void makeconsensus()
1772 int i, j, k, size, subnode;
1775 /* sort bipartition frequencies */
1776 qsort(splitfreqs, numbiparts, 2*sizeof(uli), ulicmp);
1777 /* how many bipartitions are included in the consensus tree */
1779 for (i = 0; i < numbiparts && i == consincluded; i++) {
1780 if (2*splitfreqs[2*i] > Numtrial) consincluded = i + 1;
1783 /* collect all info about majority rule consensus tree */
1784 /* the +1 is due to the edge with the root */
1785 consconfid = new_ivector(consincluded + 1);
1786 conssizes = new_ivector(2*consincluded + 2);
1787 consbiparts = new_cmatrix(consincluded + 1, Maxspc);
1789 for (i = 0; i < consincluded; i++) {
1790 /* copy partition to consbiparts */
1791 copysplit(splitfreqs[2*i+1], i);
1792 /* frequency in percent (rounded to integer) */
1793 consconfid[i] = (int) floor(100.0*splitfreqs[2*i]/Numtrial + 0.5);
1794 /* size of partition */
1795 conssizes[2*i] = splitsizes[splitfreqs[2*i+1]];
1796 conssizes[2*i+1] = i;
1798 for (i = 0; i < Maxspc; i++) consbiparts[consincluded][i] = '1';
1799 consbiparts[consincluded][outgroup] = '0';
1800 consconfid[consincluded] = 100;
1801 conssizes[2*consincluded] = Maxspc - 1;
1802 conssizes[2*consincluded + 1] = consincluded;
1804 /* sort bipartitions according to cluster size */
1805 qsort(conssizes, consincluded + 1, 2*sizeof(int), intcmp);
1807 /* reconstruct consensus tree */
1808 for (i = 0; i < consincluded; i++) { /* try every node */
1809 size = conssizes[2*i]; /* size of current node */
1810 for (j = i + 1; j < consincluded + 1; j++) {
1812 /* compare only with nodes with more descendants */
1813 if (size == conssizes[2*j]) continue;
1815 /* check whether node i is a subnode of j */
1817 for (k = 0; k < Maxspc && !subnode; k++) {
1818 chari = consbiparts[ conssizes[2*i+1] ][k];
1820 charj = consbiparts[ conssizes[2*j+1] ][k];
1821 if (chari == charj || charj == '3') subnode = TRUE;
1825 /* if i is a subnode of j change j accordingly */
1827 /* remove subnode i from j */
1828 for (k = 0; k < Maxspc; k++) {
1829 chari = consbiparts[ conssizes[2*i+1] ][k];
1831 charj = consbiparts[ conssizes[2*j+1] ][k];
1833 consbiparts[ conssizes[2*j+1] ][k] = '0';
1834 else if (charj == '3') {
1836 consbiparts[ conssizes[2*j+1] ][k] = '2';
1837 else if (chari == '2')
1838 consbiparts[ conssizes[2*j+1] ][k] = '1';
1840 /* Consensus tree [1] */
1841 FPRINTF(STDOUTFILE "\n\n\nHALT: PLEASE REPORT ERROR H TO DEVELOPERS\n\n\n");
1845 /* Consensus tree [2] */
1846 FPRINTF(STDOUTFILE "\n\n\nHALT: PLEASE REPORT ERROR I TO DEVELOPERS\n\n\n");
1851 /* add link to subnode i in node j */
1852 charj = consbiparts[ conssizes[2*j+1] ][ conssizes[2*i+1] ];
1854 consbiparts[ conssizes[2*j+1] ][ conssizes[2*i+1] ] = '2';
1855 else if (charj == '1')
1856 consbiparts[ conssizes[2*j+1] ][ conssizes[2*i+1] ] = '3';
1858 /* Consensus tree [3] */
1859 FPRINTF(STDOUTFILE "\n\n\nHALT: PLEASE REPORT ERROR J TO DEVELOPERS\n\n\n");
1867 /* prototype for recursion */
1868 void writenode(FILE *treefile, int node);
1870 /* write node (writeconsensustree) */
1871 void writenode(FILE *treefile, int node)
1875 fprintf(treefile, "(");
1877 /* write descending nodes */
1879 for (i = 0; i < Maxspc; i++) {
1880 if (consbiparts[node][i] == '2' ||
1881 consbiparts[node][i] == '3') {
1882 if (first) first = FALSE;
1884 fprintf(treefile, ",");
1889 fprintf(treefile, "\n");
1892 writenode(treefile, i);
1894 /* reliability value as internal label */
1895 fprintf(treefile, "%d", consconfid[i]);
1897 column = column + 3;
1900 /* write descending taxa */
1901 for (i = 0; i < Maxspc; i++) {
1902 if (consbiparts[node][i] == '1' ||
1903 consbiparts[node][i] == '3') {
1904 if (first) first = FALSE;
1906 fprintf(treefile, ",");
1911 fprintf(treefile, "\n");
1913 column += fputid(treefile, i);
1916 fprintf(treefile, ")");
1920 /* write consensus tree */
1921 void writeconsensustree(FILE *treefile)
1926 fprintf(treefile, "(");
1927 column += fputid(treefile, outgroup) + 2;
1928 fprintf(treefile, ",");
1929 /* write descending nodes */
1931 for (i = 0; i < Maxspc; i++) {
1932 if (consbiparts[consincluded][i] == '2' ||
1933 consbiparts[consincluded][i] == '3') {
1934 if (first) first = FALSE;
1936 fprintf(treefile, ",");
1941 fprintf(treefile, "\n");
1944 writenode(treefile, i);
1946 /* reliability value as internal label */
1947 fprintf(treefile, "%d", consconfid[i]);
1949 column = column + 3;
1952 /* write descending taxa */
1953 for (i = 0; i < Maxspc; i++) {
1954 if (consbiparts[consincluded][i] == '1' ||
1955 consbiparts[consincluded][i] == '3') {
1956 if (first) first = FALSE;
1958 fprintf(treefile, ",");
1963 fprintf(treefile, "\n");
1965 column += fputid(treefile, i);
1968 fprintf(treefile, ");\n");
1971 /* prototype for recursion */
1972 void nodecoordinates(int node);
1974 /* establish node coordinates (plotconsensustree) */
1975 void nodecoordinates(int node)
1977 int i, ymin, ymax, xcoordinate;
1979 /* first establish coordinates of descending nodes */
1980 for (i = 0; i < Maxspc; i++) {
1981 if (consbiparts[node][i] == '2' ||
1982 consbiparts[node][i] == '3')
1986 /* then establish coordinates of descending taxa */
1987 for (i = 0; i < Maxspc; i++) {
1988 if (consbiparts[node][i] == '1' ||
1989 consbiparts[node][i] == '3') {
1990 /* y-coordinate of taxon i */
1991 ycortax[i] = ytaxcounter;
1992 ytaxcounter = ytaxcounter - 2;
1996 /* then establish coordinates of this node */
1997 ymin = 2*Maxspc - 2;
2000 for (i = 0; i < Maxspc; i++) {
2001 if (consbiparts[node][i] == '2' ||
2002 consbiparts[node][i] == '3') {
2003 if (ycor[i] > ymax) ymax = ycor[i];
2004 if (ycor[i] < ymin) ymin = ycor[i];
2005 if (xcor[i] > xcoordinate) xcoordinate = xcor[i];
2008 for (i = 0; i < Maxspc; i++) {
2009 if (consbiparts[node][i] == '1' ||
2010 consbiparts[node][i] == '3') {
2011 if (ycortax[i] > ymax) ymax = ycortax[i];
2012 if (ycortax[i] < ymin) ymin = ycortax[i];
2015 ycormax[node] = ymax;
2016 ycormin[node] = ymin;
2017 ycor[node] = (int) floor(0.5*(ymax + ymin) + 0.5);
2018 if (xcoordinate == 0) xcoordinate = 9;
2019 xcor[node] = xcoordinate + 4;
2022 /* prototype for recursion */
2023 void drawnode(int node, int xold);
2025 /* drawnode (plotconsensustree) */
2026 void drawnode(int node, int xold)
2031 /* first draw vertical line */
2032 for (i = ycormin[node] + 1; i < ycormax[node]; i++)
2033 treepict[xcor[node]][i] = ':';
2035 /* then draw descending nodes */
2036 for (i = 0; i < Maxspc; i++) {
2037 if (consbiparts[node][i] == '2' ||
2038 consbiparts[node][i] == '3')
2039 drawnode(i, xcor[node]);
2042 /* then draw descending taxa */
2043 for (i = 0; i < Maxspc; i++) {
2044 if (consbiparts[node][i] == '1' ||
2045 consbiparts[node][i] == '3') {
2046 treepict[xcor[node]][ycortax[i]] = ':';
2047 for (j = xcor[node] + 1; j < xsize-10; j++)
2048 treepict[j][ycortax[i]] = '-';
2049 for (j = 0; j < 10; j++)
2050 treepict[xsize-10+j][ycortax[i]] = Identif[i][j];
2054 /* then draw internal edge with consensus value */
2055 treepict[xold][ycor[node]] = ':';
2056 treepict[xcor[node]][ycor[node]] = ':';
2057 for (i = xold + 1; i < xcor[node]-3; i++)
2058 treepict[i][ycor[node]] = '-';
2059 sprintf(buf, "%d", consconfid[node]);
2060 if (consconfid[node] == 100) {
2061 treepict[xcor[node]-3][ycor[node]] = buf[0];
2062 treepict[xcor[node]-2][ycor[node]] = buf[1];
2063 treepict[xcor[node]-1][ycor[node]] = buf[2];
2065 treepict[xcor[node]-3][ycor[node]] = '-';
2066 treepict[xcor[node]-2][ycor[node]] = buf[0];
2067 treepict[xcor[node]-1][ycor[node]] = buf[1];
2071 /* plot consensus tree */
2072 void plotconsensustree(FILE *plotfp)
2074 int i, j, yroot, startree;
2076 /* star tree or no star tree */
2077 if (consincluded == 0) {
2079 consincluded = 1; /* avoids problems with malloc */
2083 /* memory for x-y-coordinates of each bipartition */
2084 xcor = new_ivector(consincluded);
2085 ycor = new_ivector(consincluded);
2086 ycormax = new_ivector(consincluded);
2087 ycormin = new_ivector(consincluded);
2088 if (startree) consincluded = 0; /* avoids problems with malloc */
2090 /* y-coordinates of each taxon */
2091 ycortax = new_ivector(Maxspc);
2092 ycortax[outgroup] = 0;
2094 /* establish coordinates */
2095 ytaxcounter = 2*Maxspc - 2;
2097 /* first establish coordinates of descending nodes */
2098 for (i = 0; i < Maxspc; i++) {
2099 if (consbiparts[consincluded][i] == '2' ||
2100 consbiparts[consincluded][i] == '3')
2104 /* then establish coordinates of descending taxa */
2105 for (i = 0; i < Maxspc; i++) {
2106 if (consbiparts[consincluded][i] == '1' ||
2107 consbiparts[consincluded][i] == '3') {
2108 /* y-coordinate of taxon i */
2109 ycortax[i] = ytaxcounter;
2110 ytaxcounter = ytaxcounter - 2;
2114 /* then establish length of root edge and size of whole tree */
2117 for (i = 0; i < Maxspc; i++) {
2118 if (consbiparts[consincluded][i] == '2' ||
2119 consbiparts[consincluded][i] == '3') {
2120 if (ycor[i] > yroot) yroot = ycor[i];
2121 if (xcor[i] > xsize) xsize = xcor[i];
2124 for (i = 0; i < Maxspc; i++) {
2125 if (consbiparts[consincluded][i] == '1' ||
2126 consbiparts[consincluded][i] == '3') {
2127 if (ycortax[i] > yroot) yroot = ycortax[i];
2130 if (xsize == 0) xsize = 9;
2131 /* size in x direction inclusive one blank on the left */
2134 /* change all x-labels so that (0,0) is down-left */
2135 for (i = 0; i < consincluded; i++)
2136 xcor[i] = xsize-1-xcor[i];
2139 treepict = new_cmatrix(xsize, 2*Maxspc-1);
2140 for (i = 0; i < xsize; i++)
2141 for (j = 0; j < 2*Maxspc-1; j++)
2142 treepict[i][j] = ' ';
2145 for (i = 1; i < yroot; i++)
2146 treepict[1][i] = ':';
2147 treepict[1][0] = ':';
2148 for (i = 2; i < xsize - 10; i++)
2149 treepict[i][0] = '-';
2150 for (i = 0; i < 10; i++)
2151 treepict[xsize-10+i][0] = Identif[outgroup][i];
2153 /* then draw descending nodes */
2154 for (i = 0; i < Maxspc; i++) {
2155 if (consbiparts[consincluded][i] == '2' ||
2156 consbiparts[consincluded][i] == '3')
2160 /* then draw descending taxa */
2161 for (i = 0; i < Maxspc; i++) {
2162 if (consbiparts[consincluded][i] == '1' ||
2163 consbiparts[consincluded][i] == '3') {
2164 treepict[1][ycortax[i]] = ':';
2165 for (j = 2; j < xsize-10; j++)
2166 treepict[j][ycortax[i]] = '-';
2167 for (j = 0; j < 10; j++)
2168 treepict[xsize-10+j][ycortax[i]] = Identif[i][j];
2173 for (i = 2*Maxspc-2; i > -1; i--) {
2174 for (j = 0; j < xsize; j++)
2175 fputc(treepict[j][i], plotfp);
2176 fputc('\n', plotfp);
2181 free_ivector(ycormax);
2182 free_ivector(ycormin);
2183 free_ivector(ycortax);
2184 free_cmatrix(treepict);
2189 /******************************************************************************/
2190 /* storing and evaluating quartet branching information */
2191 /******************************************************************************/
2195 for a quartet with the taxa a, b, c, d there are
2196 three possible binary trees:
2202 For every quartet information about its branching structure is
2203 stored. With the functions readquartet and writequartet
2204 this information can be accessed. For every quartet (a,b,c,d)
2205 with a < b < c < d (taxa) the branching information is encoded
2209 +-------------+-------------+-------------+-------------+
2210 | not used | tree 3 | tree 2 | tree 1 |
2211 +-------------+-------------+-------------+-------------+
2213 If the branching structure of the taxa corresponds to one of the
2214 three trees the corresponding bit is set. If the branching structure
2215 is unclear because two of the three trees have the same maximum
2216 likelihood value the corresponding two bits are set. If the branching
2217 structure is completely unknown all the bits are set (the highest
2218 bit is always cleared because it is not used).
2222 /* allocate memory for quartets */
2223 unsigned char *mallocquartets(int taxa)
2226 unsigned char *qinfo;
2228 /* compute number of quartets */
2229 Numquartets = (uli) taxa*(taxa-1)*(taxa-2)*(taxa-3)/24;
2230 if (Numquartets % 2 == 0) { /* even number */
2231 numch = Numquartets/2;
2232 } else { /* odd number */
2233 numch = (Numquartets + 1)/2;
2235 /* allocate memory */
2236 qinfo = (unsigned char *) malloc(numch * sizeof(unsigned char) );
2237 if (qinfo == NULL) maerror("quartetinfo in mallocquartets");
2238 for (nc = 0; nc < numch; nc++) qinfo[nc] = 0;
2242 /* free quartet memory */
2248 /* read quartet info - a < b < c < d */
2249 unsigned char readquartet(int a, int b, int c, int d)
2255 + (uli) c*(c-1)*(c-2)/6
2256 + (uli) d*(d-1)*(d-2)*(d-3)/24;
2257 if (qnum % 2 == 0) { /* even number */
2259 return (quartetinfo[qnum/2] & (unsigned char) 15);
2260 } else { /* odd number */
2262 return ((quartetinfo[(qnum-1)/2] & (unsigned char) 240)>>4);
2266 /* write quartet info - a < b < c < d, 0 <= info <= 15 */
2267 void writequartet(int a, int b, int c, int d, unsigned char info)
2273 + (uli) c*(c-1)*(c-2)/6
2274 + (uli) d*(d-1)*(d-2)*(d-3)/24;
2275 if (qnum % 2 == 0) { /* even number */
2277 quartetinfo[qnum/2] =
2278 ((quartetinfo[qnum/2] & (unsigned char) 240) |
2279 (info & (unsigned char) 15));
2280 } else { /* odd number */
2282 quartetinfo[(qnum-1)/2] =
2283 ((quartetinfo[(qnum-1)/2] & (unsigned char) 15) |
2284 ((info & (unsigned char) 15)<<4));
2289 void openfiletowrite(FILE **, char[], char[]);
2290 void closefile(FILE *);
2292 /* sorts three doubles in descending order */
2293 void sort3doubles(dvector num, ivector order)
2295 if (num[0] > num[1]) {
2296 if(num[2] > num[0]) {
2300 } else if (num[2] < num[1]) {
2310 if(num[2] > num[1]) {
2314 } else if (num[2] < num[0]) {
2326 /* checks out all possible quartets */
2327 void computeallquartets()
2331 unsigned char treebits[3];
2335 double qc2, mintogo, minutes, hours, temp;
2336 double temp1, temp2, temp3;
2337 unsigned char discreteweight[3];
2341 treebits[0] = (unsigned char) 1;
2342 treebits[1] = (unsigned char) 2;
2343 treebits[2] = (unsigned char) 4;
2345 if (show_optn) { /* list all unresolved quartets */
2346 openfiletowrite(&unresfp, UNRESOLVED, "unresolved quartet trees");
2347 fprintf(unresfp, "List of all completely unresolved quartets:\n\n");
2353 /* start timer - percentage of completed quartets */
2369 initsched(&sched, numquarts(Maxspc), PP_NumProcs-1, 4);
2370 qamount=sgss(&sched);
2371 while (qamount > 0) {
2372 if (PP_emptyslave()) {
2373 PP_RecvQuartBlock(0, &sq, &noq, quartetinfo, &apr);
2376 dest = PP_getslave();
2377 PP_SendDoQuartBlock(dest, qaddr, qamount, (approxqp ? APPROX : EXACT));
2378 qblocksent += qamount;
2380 qamount=sgss(&sched);
2382 MPI_Iprobe(MPI_ANY_SOURCE, PP_QUARTBLOCKSPECS, PP_Comm, &flag, &stat);
2384 PP_RecvQuartBlock(0, &sq, &noq, quartetinfo, &apr);
2386 MPI_Iprobe(MPI_ANY_SOURCE, PP_QUARTBLOCKSPECS, PP_Comm, &flag, &stat);
2389 while (qblocksent > 0) {
2390 PP_RecvQuartBlock(0, &sq, &noq, quartetinfo, &apr);
2394 # else /* PARALLEL */
2396 addtimes(GENERAL, &tarr);
2397 if (savequartlh_optn) {
2398 openfiletowrite(&lhfp, ALLQUARTLH, "all quartet likelihoods");
2399 if (saveqlhbin_optn) writetpqfheader(Maxspc, lhfp, 3);
2400 else writetpqfheader(Maxspc, lhfp, 4);
2403 for (i = 3; i < Maxspc; i++)
2404 for (c = 2; c < i; c++)
2405 for (b = 1; b < c; b++)
2406 for (a = 0; a < b; a++) {
2409 /* generate message every 15 minutes */
2412 if ( (time2 - time1) > 900) {
2413 /* every 900 seconds */
2414 /* percentage of completed quartets */
2416 FPRINTF(STDOUTFILE "\n");
2419 qc2 = 100.*nq/Numquartets;
2420 mintogo = (100.0-qc2) *
2421 (double) (time2-time0)/60.0/qc2;
2422 hours = floor(mintogo/60.0);
2423 minutes = mintogo - 60.0*hours;
2424 FPRINTF(STDOUTFILE "%.2f%%", qc2);
2425 FPRINTF(STDOUTFILE " completed (remaining");
2426 FPRINTF(STDOUTFILE " time: %.0f", hours);
2427 FPRINTF(STDOUTFILE " hours %.0f", minutes);
2428 FPRINTF(STDOUTFILE " minutes)\n");
2433 /* maximum likelihood values */
2435 /* exact or approximate maximum likelihood values */
2436 compute_quartlklhds(a,b,c,i,&qweight[0],&qweight[1],&qweight[2], (approxqp ? APPROX : EXACT));
2438 if (savequartlh_optn) {
2439 if (saveqlhbin_optn)
2440 fwrite(qweight, sizeof(double), 3, lhfp);
2442 fprintf(lhfp, "(%d,%d,%d,%d)\t%f\t%f\t%f\n", a, b, c, i,
2443 qweight[0], qweight[1], qweight[2]);
2446 /* sort in descending order */
2447 sort3doubles(qweight, qworder);
2449 if (usebestq_optn) {
2451 discreteweight[sqorder[2]] = treebits[qworder[0]];
2452 if (qweight[qworder[0]] == qweight[qworder[1]]) {
2453 discreteweight[sqorder[2]] = discreteweight[sqorder[2]] || treebits[qworder[1]];
2454 if (qweight[qworder[1]] == qweight[qworder[2]]) {
2455 discreteweight[sqorder[2]] = discreteweight[sqorder[2]] || treebits[qworder[2]];
2456 discreteweight[sqorder[2]] = 7;
2461 /* compute Bayesian weights */
2462 qweight[qworder[1]] = exp(qweight[qworder[1]]-qweight[qworder[0]]);
2463 qweight[qworder[2]] = exp(qweight[qworder[2]]-qweight[qworder[0]]);
2464 qweight[qworder[0]] = 1.0;
2465 temp = qweight[0] + qweight[1] + qweight[2];
2466 qweight[0] = qweight[0]/temp;
2467 qweight[1] = qweight[1]/temp;
2468 qweight[2] = qweight[2]/temp;
2470 /* square deviations */
2471 temp1 = 1.0 - qweight[qworder[0]];
2472 sqdiff[0] = temp1 * temp1 +
2473 qweight[qworder[1]] * qweight[qworder[1]] +
2474 qweight[qworder[2]] * qweight[qworder[2]];
2475 discreteweight[0] = treebits[qworder[0]];
2477 temp1 = 0.5 - qweight[qworder[0]];
2478 temp2 = 0.5 - qweight[qworder[1]];
2479 sqdiff[1] = temp1 * temp1 + temp2 * temp2 +
2480 qweight[qworder[2]] * qweight[qworder[2]];
2481 discreteweight[1] = treebits[qworder[0]] + treebits[qworder[1]];
2483 temp1 = onethird - qweight[qworder[0]];
2484 temp2 = onethird - qweight[qworder[1]];
2485 temp3 = onethird - qweight[qworder[2]];
2486 sqdiff[2] = temp1 * temp1 + temp2 * temp2 + temp3 * temp3;
2487 discreteweight[2] = (unsigned char) 7;
2489 /* sort in descending order */
2490 sort3doubles(sqdiff, sqorder);
2493 /* determine best discrete weight */
2494 writequartet(a, b, c, i, discreteweight[sqorder[2]]);
2496 /* counting completely unresolved quartets */
2497 if (discreteweight[sqorder[2]] == 7) {
2504 fputid10(unresfp, a);
2505 fprintf(unresfp, " ");
2506 fputid10(unresfp, b);
2507 fprintf(unresfp, " ");
2508 fputid10(unresfp, c);
2509 fprintf(unresfp, " ");
2511 fprintf(unresfp, "\n");
2514 addtimes(QUARTETS, &tarr);
2516 if (savequartlh_optn) {
2522 FPRINTF(STDOUTFILE "\n");
2523 # endif /* PARALLEL */
2527 /* check the branching structure between the leaves (not the taxa!)
2528 A, B, C, and I (A, B, C, I don't need to be ordered). As a result,
2529 the two leaves that are closer related to each other than to leaf I
2530 are found in chooseA and chooseB. If the branching structure is
2531 not uniquely defined, ChooseA and ChooseB are chosen randomly
2532 from the possible taxa */
2533 void checkquartet(int A, int B, int C, int I)
2535 int i, j, a, b, taxon[5], leaf[5], ipos;
2536 unsigned char qresult;
2537 int notunique = FALSE;
2539 /* The relationship between leaves and taxa is defined by trueID */
2540 taxon[1] = trueID[A]; /* taxon number */
2541 leaf[1] = A; /* leaf number */
2542 taxon[2] = trueID[B];
2544 taxon[3] = trueID[C];
2546 taxon[4] = trueID[I];
2550 /* Source: Numerical Recipes (PIKSR2.C) */
2551 for (j = 2; j <= 4; j++) {
2555 while (i > 0 && taxon[i] > a) {
2556 taxon[i+1] = taxon[i];
2557 leaf[i+1] = leaf[i];
2564 /* where is leaf I ? */
2566 while (leaf[ipos] != I) ipos++;
2568 /* look at sequence quartet */
2569 qresult = readquartet(taxon[1], taxon[2], taxon[3], taxon[4]);
2571 /* chooseA and chooseB */
2575 /* one single branching structure */
2578 case 1: if (ipos == 1 || ipos == 2) {
2589 case 2: if (ipos == 1 || ipos == 3) {
2600 case 4: if (ipos == 1 || ipos == 4) {
2610 /* two possible branching structures */
2613 case 3: if (randominteger(2)) qresult = 1;
2619 case 5: if (randominteger(2)) qresult = 1;
2625 case 6: if (randominteger(2)) qresult = 2;
2630 /* three possible branching structures */
2633 case 7: qresult = (1 << randominteger(3)); /* 1, 2, or 4 */
2637 default: /* Program error [checkquartet] */
2639 FPRINTF(STDOUTFILE "\n\n\n(%2d)HALT: PLEASE REPORT ERROR K-PARALLEL TO DEVELOPERS (%d,%d,%d,%d) = %ld\n\n\n",
2640 PP_Myid, taxon[1], taxon[2], taxon[3], taxon[4],
2641 quart2num(taxon[1], taxon[2], taxon[3], taxon[4]));
2643 FPRINTF(STDOUTFILE "\n\n\nHALT: PLEASE REPORT ERROR K TO DEVELOPERS\n\n\n");
2647 } while (notunique);