5 /* version 3.6. (c) Copyright 1993-2004 by the University of Washington.
6 Written by Joseph Felsenstein, Akiko Fuseki, Sean Lamont, and Andrew Keeffe.
7 Permission is granted to copy and use this program provided no fee is
8 charged for it and provided that this copyright notice is not removed. */
10 #define maxtrees 100 /* maximum number of tied trees stored */
13 universal, ciliate, mito, vertmito, flymito, yeastmito
16 /* nodes will form a binary tree */
24 /* function prototypes */
25 void protgnu(gseq **);
26 void protchuck(gseq *);
29 void getoptions(void);
30 void protalloctree(void);
33 void protinputdata(void);
35 void protmakevalues(void);
37 void protfillin(node *, node *, node *);
38 void protpreorder(node *);
39 void protadd(node *, node *, node *);
40 void protre_move(node **, node **);
41 void evaluate(node *);
42 void protpostorder(node *);
43 void protreroot(node *);
44 void protsavetraverse(node *, long *, boolean *);
46 void protsavetree(long *, boolean *);
47 void tryadd(node *, node **, node **);
48 void addpreorder(node *, node *, node *);
49 void tryrearr(node *, boolean *);
50 void repreorder(node *, boolean *);
51 void rearrange(node **);
52 void protgetch(Char *);
53 void protaddelement(node **, long *, long *, boolean *);
54 void prottreeread(void);
55 void protancestset(long *, long *, long *, long *, long *);
57 void prothyprint(long , long , boolean *, node *, boolean *, boolean *);
58 void prothyptrav(node *, sitearray *, long, long, long *, boolean *,
60 void prothypstates(long *);
63 void reallocnode(node* p);
64 void reallocchars(void);
65 /* function prototypes */
70 Char infilename[FNMLNGTH], outfilename[FNMLNGTH], intreename[FNMLNGTH], outtreename[FNMLNGTH], weightfilename[FNMLNGTH];
72 long chars, col, msets, ith, njumble, jumb;
73 /* chars = number of sites in actual sequences */
75 boolean jumble, usertree, weights, thresh, trout, progress, stepbox,
76 justwts, ancseq, mulsets, firstset;
78 long fullset, fulldel;
79 pointarray treenode; /* pointers to all nodes in tree */
84 sitearray translate[(long)quest - (long)ala + 1];
95 /* Local variables for maketree, propagated globally for c version: */
97 double like, bestyet, bestlike, minsteps, bstlike2;
98 boolean lastrearr, recompute;
100 double nsteps[maxuser];
105 void protgnu(gseq **p)
107 /* this and the following are do-it-yourself garbage collectors.
108 Make a new node or pull one off the garbage list */
109 if (garbage != NULL) {
112 (*p)->seq = (seqptr)Malloc(chars*sizeof(sitearray));
113 garbage = garbage->next;
115 *p = (gseq *)Malloc(sizeof(gseq));
116 (*p)->seq = (seqptr)Malloc(chars*sizeof(sitearray));
122 void protchuck(gseq *p)
124 /* collect garbage on p -- put it on front of garbage list */
132 /* make up table of the code 1 = u, 2 = c, 3 = a, 4 = g */
133 trans[0][0][0] = phe;
134 trans[0][0][1] = phe;
135 trans[0][0][2] = leu;
136 trans[0][0][3] = leu;
137 trans[0][1][0] = ser1;
138 trans[0][1][1] = ser1;
139 trans[0][1][2] = ser1;
140 trans[0][1][3] = ser1;
141 trans[0][2][0] = tyr;
142 trans[0][2][1] = tyr;
143 trans[0][2][2] = stop;
144 trans[0][2][3] = stop;
145 trans[0][3][0] = cys;
146 trans[0][3][1] = cys;
147 trans[0][3][2] = stop;
148 trans[0][3][3] = trp;
149 trans[1][0][0] = leu;
150 trans[1][0][1] = leu;
151 trans[1][0][2] = leu;
152 trans[1][0][3] = leu;
153 trans[1][1][0] = pro;
154 trans[1][1][1] = pro;
155 trans[1][1][2] = pro;
156 trans[1][1][3] = pro;
157 trans[1][2][0] = his;
158 trans[1][2][1] = his;
159 trans[1][2][2] = gln;
160 trans[1][2][3] = gln;
161 trans[1][3][0] = arg;
162 trans[1][3][1] = arg;
163 trans[1][3][2] = arg;
164 trans[1][3][3] = arg;
165 trans[2][0][0] = ileu;
166 trans[2][0][1] = ileu;
167 trans[2][0][2] = ileu;
168 trans[2][0][3] = met;
169 trans[2][1][0] = thr;
170 trans[2][1][1] = thr;
171 trans[2][1][2] = thr;
172 trans[2][1][3] = thr;
173 trans[2][2][0] = asn;
174 trans[2][2][1] = asn;
175 trans[2][2][2] = lys;
176 trans[2][2][3] = lys;
177 trans[2][3][0] = ser2;
178 trans[2][3][1] = ser2;
179 trans[2][3][2] = arg;
180 trans[2][3][3] = arg;
181 trans[3][0][0] = val;
182 trans[3][0][1] = val;
183 trans[3][0][2] = val;
184 trans[3][0][3] = val;
185 trans[3][1][0] = ala;
186 trans[3][1][1] = ala;
187 trans[3][1][2] = ala;
188 trans[3][1][3] = ala;
189 trans[3][2][0] = asp;
190 trans[3][2][1] = asp;
191 trans[3][2][2] = glu;
192 trans[3][2][3] = glu;
193 trans[3][3][0] = gly;
194 trans[3][3][1] = gly;
195 trans[3][3][2] = gly;
196 trans[3][3][3] = gly;
197 if (whichcode == mito)
198 trans[0][3][2] = trp;
199 if (whichcode == vertmito) {
200 trans[0][3][2] = trp;
201 trans[2][3][2] = stop;
202 trans[2][3][3] = stop;
203 trans[2][0][2] = met;
205 if (whichcode == flymito) {
206 trans[0][3][2] = trp;
207 trans[2][0][2] = met;
208 trans[2][3][2] = ser2;
210 if (whichcode == yeastmito) {
211 trans[0][3][2] = trp;
212 trans[1][0][2] = thr;
213 trans[2][0][2] = met;
220 /* set up set table to get aasets from aas */
224 for (a = ala; (long)a <= (long)stop; a = (aas)((long)a + 1)) {
225 translate[(long)a - (long)ala][0] = 1L << ((long)a);
226 translate[(long)a - (long)ala][1] = 1L << ((long)a);
228 for (i = 0; i <= 3; i++) {
229 for (j = 0; j <= 3; j++) {
230 for (k = 0; k <= 3; k++) {
231 for (l = 0; l <= 3; l++) {
232 translate[(long)trans[i][j][k]][1] |= (1L << (long)trans[l][j][k]);
233 translate[(long)trans[i][j][k]][1] |= (1L << (long)trans[i][l][k]);
234 translate[(long)trans[i][j][k]][1] |= (1L << (long)trans[i][j][l]);
239 translate[(long)del - (long)ala][1] = 1L << ((long)del);
240 fulldel = (1L << ((long)stop + 1)) - (1L << ((long)ala));
241 fullset = fulldel & (~(1L << ((long)del)));
242 translate[(long)asx - (long)ala][0]
243 = (1L << ((long)asn)) | (1L << ((long)asp));
244 translate[(long)glx - (long)ala][0]
245 = (1L << ((long)gln)) | (1L << ((long)glu));
246 translate[(long)ser - (long)ala][0]
247 = (1L << ((long)ser1)) | (1L << ((long)ser2));
248 translate[(long)unk - (long)ala][0] = fullset;
249 translate[(long)quest - (long)ala][0] = fulldel;
250 translate[(long)asx - (long)ala][1] = translate[(long)asn - (long)ala][1]
251 | translate[(long)asp - (long)ala][1];
252 translate[(long)glx - (long)ala][1] = translate[(long)gln - (long)ala][1]
253 | translate[(long)glu - (long)ala][1];
254 translate[(long)ser - (long)ala][1] = translate[(long)ser1 - (long)ala][1]
255 | translate[(long)ser2 - (long)ala][1];
256 translate[(long)unk - (long)ala][1] = fullset;
257 translate[(long)quest - (long)ala][1] = fulldel;
258 for (a = ala; (long)a <= (long)quest; a = (aas)((long)a + 1)) {
260 for (b = ala; (long)b <= (long)stop; b = (aas)((long)b + 1)) {
261 if (((1L << ((long)b)) & translate[(long)a - (long)ala][1]) != 0)
262 s |= translate[(long)b - (long)ala][1];
264 translate[(long)a - (long)ala][2] = s;
271 /* interactively set options */
272 long loopcount, loopcount2;
275 fprintf(outfile, "\nProtein parsimony algorithm, version %s\n\n",VERSION);
285 whichcode = universal;
296 printf("\nProtein parsimony algorithm, version %s\n\n",VERSION);
297 printf("Setting for this run:\n");
298 printf(" U Search for best tree? %s\n",
299 (usertree ? "No, use user trees in input file" : "Yes"));
301 printf(" J Randomize input order of sequences?");
303 printf(" Yes (seed =%8ld,%3ld times)\n", inseed0, njumble);
305 printf(" No. Use input order\n");
307 printf(" O Outgroup root?");
309 printf(" Yes, at sequence number%3ld\n", outgrno);
311 printf(" No, use as outgroup species%3ld\n", outgrno);
312 printf(" T Use Threshold parsimony?");
314 printf(" Yes, count steps up to%4.1f per site\n", threshold);
316 printf(" No, use ordinary parsimony\n");
317 printf(" C Use which genetic code? %s\n",
318 (whichcode == universal) ? "Universal" :
319 (whichcode == ciliate) ? "Ciliate" :
320 (whichcode == mito) ? "Universal mitochondrial" :
321 (whichcode == vertmito) ? "Vertebrate mitochondrial" :
322 (whichcode == flymito) ? "Fly mitochondrial" :
323 (whichcode == yeastmito) ? "Yeast mitochondrial" : "");
324 printf(" W Sites weighted? %s\n",
325 (weights ? "Yes" : "No"));
326 printf(" M Analyze multiple data sets?");
328 printf(" Yes, %2ld %s\n", msets,
329 (justwts ? "sets of weights" : "data sets"));
332 printf(" I Input sequences interleaved? %s\n",
333 (interleaved ? "Yes" : "No"));
334 printf(" 0 Terminal type (IBM PC, ANSI, none)? %s\n",
335 (ibmpc ? "IBM PC" : ansi ? "ANSI" : "(none)"));
336 printf(" 1 Print out the data at start of run %s\n",
337 (printdata ? "Yes" : "No"));
338 printf(" 2 Print indications of progress of run %s\n",
339 (progress ? "Yes" : "No"));
340 printf(" 3 Print out tree %s\n",
341 (treeprint ? "Yes" : "No"));
342 printf(" 4 Print out steps in each site %s\n",
343 (stepbox ? "Yes" : "No"));
344 printf(" 5 Print sequences at all nodes of tree %s\n",
345 (ancseq ? "Yes" : "No"));
346 if (ancseq || printdata)
347 printf(" . Use dot-differencing to display them %s\n",
348 dotdiff ? "Yes" : "No");
349 printf(" 6 Write out trees onto tree file? %s\n",
350 (trout ? "Yes" : "No"));
351 if(weights && justwts){
353 "WARNING: W option and Multiple Weights options are both on. ");
355 "The W menu option is unnecessary and has no additional effect. \n");
358 "\nAre these settings correct? (type Y or the letter for one to change)\n");
359 scanf("%c%*[^\n]", &ch);
364 if (strchr("WCJOTUMI12345.60",ch) != NULL) {
370 initjumble(&inseed, &inseed0, seed, &njumble);
379 outgropt = !outgropt;
381 initoutgroup(&outgrno, spp);
388 initthreshold(&threshold);
392 printf("\nWhich genetic code?\n");
393 printf(" type for\n\n");
394 printf(" U Universal\n");
395 printf(" M Mitochondrial\n");
396 printf(" V Vertebrate mitochondrial\n");
397 printf(" F Fly mitochondrial\n");
398 printf(" Y Yeast mitochondrial\n\n");
401 printf("type U, M, V, F, or Y\n");
402 scanf("%c%*[^\n]", &ch);
407 countup(&loopcount2, 10);
408 } while (ch != 'U' && ch != 'M' && ch != 'V'
409 && ch != 'F' && ch != 'Y');
413 whichcode = universal;
421 whichcode = vertmito;
429 whichcode = yeastmito;
437 printf("Multiple data sets or multiple weights?");
440 printf(" (type D or W)\n");
442 phyFillScreenColor();
444 scanf("%c%*[^\n]", &ch2);
449 countup(&loopcount2, 10);
450 } while ((ch2 != 'W') && (ch2 != 'D'));
451 justwts = (ch2 == 'W');
455 initdatasets(&msets);
458 initjumble(&inseed, &inseed0, seed, &njumble);
464 interleaved = !interleaved;
468 usertree = !usertree;
472 initterminal(&ibmpc, &ansi);
476 printdata = !printdata;
480 progress = !progress;
484 treeprint = !treeprint;
504 printf("Not a possible option!\n");
505 countup(&loopcount, 100);
511 { /* allocate treenode dynamically */
515 treenode = (pointarray)Malloc(nonodes*sizeof(node *));
516 for (i = 0; i < (spp); i++) {
517 treenode[i] = (node *)Malloc(sizeof(node));
518 treenode[i]->numsteps = (steptr)Malloc(chars*sizeof(long));
519 treenode[i]->siteset = (seqptr)Malloc(chars*sizeof(sitearray));
520 treenode[i]->seq = (aas *)Malloc(chars*sizeof(aas));
522 for (i = spp; i < (nonodes); i++) {
524 for (j = 1; j <= 3; j++) {
525 p = (node *)Malloc(sizeof(node));
526 p->numsteps = (steptr)Malloc(chars*sizeof(long));
527 p->siteset = (seqptr)Malloc(chars*sizeof(sitearray));
528 p->seq = (aas *)Malloc(chars*sizeof(aas));
532 p->next->next->next = p;
535 } /* protalloctree */
538 void reallocnode(node* p)
543 p->numsteps = (steptr)Malloc(chars*sizeof(long));
544 p->siteset = (seqptr)Malloc(chars*sizeof(sitearray));
545 p->seq = (aas *)Malloc(chars*sizeof(aas));
549 void reallocchars(void)
550 { /* reallocates variables that are dependand on the number of chars
551 * do we need to reallocate the garbage list too? */
556 for (i = 0; i < maxuser; i++) {
558 fsteps[i] = (long *)Malloc(chars*sizeof(long));
561 for (i = 0; i < nonodes; i++) {
562 reallocnode(treenode[i]);
565 while (p != treenode[i]) {
574 free(temp->numsteps);
577 free(temp1->numsteps);
578 free(temp1->siteset);
581 weight = (steptr)Malloc(chars*sizeof(long));
582 threshwt = (steptr)Malloc(chars*sizeof(long));
583 temp->numsteps = (steptr)Malloc(chars*sizeof(long));
584 temp->siteset = (seqptr)Malloc(chars*sizeof(sitearray));
585 temp->seq = (aas *)Malloc(chars*sizeof(aas));
586 temp1->numsteps = (steptr)Malloc(chars*sizeof(long));
587 temp1->siteset = (seqptr)Malloc(chars*sizeof(sitearray));
588 temp1->seq = (aas *)Malloc(chars*sizeof(aas));
593 { /* allocate remaining global arrays and variables dynamically */
597 fsteps = (long **)Malloc(maxuser*sizeof(long *));
598 for (i = 0; i < maxuser; i++)
599 fsteps[i] = (long *)Malloc(chars*sizeof(long));
601 bestrees = (bestelm *)Malloc(maxtrees*sizeof(bestelm));
602 for (i = 1; i <= maxtrees; i++)
603 bestrees[i - 1].btree = (long *)Malloc(spp*sizeof(long));
604 nayme = (naym *)Malloc(spp*sizeof(naym));
605 enterorder = (long *)Malloc(spp*sizeof(long));
606 place = (long *)Malloc(nonodes*sizeof(long));
607 weight = (steptr)Malloc(chars*sizeof(long));
608 threshwt = (steptr)Malloc(chars*sizeof(long));
609 temp = (node *)Malloc(sizeof(node));
610 temp->numsteps = (steptr)Malloc(chars*sizeof(long));
611 temp->siteset = (seqptr)Malloc(chars*sizeof(sitearray));
612 temp->seq = (aas *)Malloc(chars*sizeof(aas));
613 temp1 = (node *)Malloc(sizeof(node));
614 temp1->numsteps = (steptr)Malloc(chars*sizeof(long));
615 temp1->siteset = (seqptr)Malloc(chars*sizeof(sitearray));
616 temp1->seq = (aas *)Malloc(chars*sizeof(aas));
622 /* initializes variables */
624 inputnumbers(&spp, &chars, &nonodes, 1);
627 fprintf(outfile, "%2ld species, %3ld sites\n\n", spp, chars);
635 /* input the names and sequences for each species */
636 long i, j, k, l, aasread, aasnew = 0;
638 boolean allread, done;
639 aas aa; /* temporary amino acid for input */
642 headings(chars, "Sequences", "---------");
646 /* eat white space -- if the separator line has spaces on it*/
648 charstate = gettc(infile);
649 } while (charstate == ' ' || charstate == '\t');
650 ungetc(charstate, infile);
656 if ((interleaved && aasread == 0) || !interleaved)
658 j = interleaved ? aasread : 0;
660 while (!done && !eoff(infile)) {
663 while (j < chars && !(eoln(infile) || eoff(infile))) {
664 charstate = gettc(infile);
665 if (charstate == '\n' || charstate == '\t')
667 if (charstate == ' ' || (charstate >= '0' && charstate <= '9'))
669 uppercase(&charstate);
670 if ((!isalpha(charstate) && charstate != '?' &&
671 charstate != '-' && charstate != '*') || charstate == 'J' ||
672 charstate == 'O' || charstate == 'U') {
673 printf("WARNING -- BAD AMINO ACID:%c",charstate);
674 printf(" AT POSITION%5ld OF SPECIES %3ld\n",j,i);
678 aa = (charstate == 'A') ? ala :
679 (charstate == 'B') ? asx :
680 (charstate == 'C') ? cys :
681 (charstate == 'D') ? asp :
682 (charstate == 'E') ? glu :
683 (charstate == 'F') ? phe :
684 (charstate == 'G') ? gly : aa;
685 aa = (charstate == 'H') ? his :
686 (charstate == 'I') ? ileu :
687 (charstate == 'K') ? lys :
688 (charstate == 'L') ? leu :
689 (charstate == 'M') ? met :
690 (charstate == 'N') ? asn :
691 (charstate == 'P') ? pro :
692 (charstate == 'Q') ? gln :
693 (charstate == 'R') ? arg : aa;
694 aa = (charstate == 'S') ? ser :
695 (charstate == 'T') ? thr :
696 (charstate == 'V') ? val :
697 (charstate == 'W') ? trp :
698 (charstate == 'X') ? unk :
699 (charstate == 'Y') ? tyr :
700 (charstate == 'Z') ? glx :
701 (charstate == '*') ? stop :
702 (charstate == '?') ? quest:
703 (charstate == '-') ? del : aa;
705 treenode[i - 1]->seq[j - 1] = aa;
706 memcpy(treenode[i - 1]->siteset[j - 1],
707 translate[(long)aa - (long)ala], sizeof(sitearray));
716 if (interleaved && i == 1)
719 if ((interleaved && j != aasnew) || ((!interleaved) && j != chars)){
720 printf("ERROR: SEQUENCES OUT OF ALIGNMENT\n");
726 allread = (aasread == chars);
731 for (i = 1; i <= ((chars - 1) / 60 + 1); i++) {
732 for (j = 1; j <= (spp); j++) {
733 for (k = 0; k < nmlngth; k++)
734 putc(nayme[j - 1][k], outfile);
735 fprintf(outfile, " ");
739 for (k = (i - 1) * 60 + 1; k <= l; k++) {
740 if (j > 1 && treenode[j - 1]->seq[k - 1] == treenode[0]->seq[k - 1])
743 tmpa = treenode[j-1]->seq[k-1];
744 charstate = (tmpa == ala) ? 'A' :
745 (tmpa == asx) ? 'B' :
746 (tmpa == cys) ? 'C' :
747 (tmpa == asp) ? 'D' :
748 (tmpa == glu) ? 'E' :
749 (tmpa == phe) ? 'F' :
750 (tmpa == gly) ? 'G' :
751 (tmpa == his) ? 'H' :
752 (tmpa ==ileu) ? 'I' :
753 (tmpa == lys) ? 'K' :
754 (tmpa == leu) ? 'L' : charstate;
755 charstate = (tmpa == met) ? 'M' :
756 (tmpa == asn) ? 'N' :
757 (tmpa == pro) ? 'P' :
758 (tmpa == gln) ? 'Q' :
759 (tmpa == arg) ? 'R' :
760 (tmpa == ser) ? 'S' :
761 (tmpa ==ser1) ? 'S' :
762 (tmpa ==ser2) ? 'S' : charstate;
763 charstate = (tmpa == thr) ? 'T' :
764 (tmpa == val) ? 'V' :
765 (tmpa == trp) ? 'W' :
766 (tmpa == unk) ? 'X' :
767 (tmpa == tyr) ? 'Y' :
768 (tmpa == glx) ? 'Z' :
769 (tmpa == del) ? '-' :
770 (tmpa ==stop) ? '*' :
771 (tmpa==quest) ? '?' : charstate;
773 putc(charstate, outfile);
774 if (k % 10 == 0 && k % 60 != 0)
784 } /* protinputdata */
787 void protmakevalues()
789 /* set up fractional likelihoods at tips */
793 for (i = 1; i <= nonodes; i++) {
794 treenode[i - 1]->back = NULL;
795 treenode[i - 1]->tip = (i <= spp);
796 treenode[i - 1]->index = i;
797 for (j = 0; j < (chars); j++)
798 treenode[i - 1]->numsteps[j] = 0;
800 p = treenode[i - 1]->next;
801 while (p != treenode[i - 1]) {
805 for (j = 0; j < (chars); j++)
811 } /* protmakevalues */
816 /* reads the input data */
822 for (i = 0; i < chars; i++)
824 inputweights(chars, weight, &weights);
826 fprintf(outfile, "\n\nWeights set # %ld:\n\n", ith);
828 printf("\nWeights set # %ld:\n\n", ith);
831 printweights(outfile, 0, chars, weight, "Sites");
834 samenumsp(&chars, ith);
837 for (i = 0; i < chars; i++)
840 inputweights(chars, weight, &weights);
843 printweights(outfile, 0, chars, weight, "Sites");
847 threshold = spp * 3.0;
848 for(i = 0 ; i < (chars) ; i++){
850 threshwt[i] = (long)(threshold * weight[i] + 0.5);
857 void protfillin(node *p, node *left, node *rt)
859 /* sets up for each node in the tree the aa set for site m
860 at that point and counts the changes. The program
861 spends much of its time in this function */
862 boolean counted, done;
865 sitearray ls, rs, qs;
868 for (m = 0; m < chars; m++) {
870 memcpy(ls, left->siteset[m], sizeof(sitearray));
872 memcpy(rs, rt->siteset[m], sizeof(sitearray));
875 memcpy(qs, rs, sizeof(sitearray));
877 else if (rt == NULL) {
878 n = left->numsteps[m];
879 memcpy(qs, ls, sizeof(sitearray));
882 n = left->numsteps[m] + rt->numsteps[m];
883 if ((ls[0] == rs[0]) && (ls[1] == rs[1]) && (ls[2] == rs[2])) {
890 for (i = 0; (!counted) && (i <= 3); i++) {
898 s = (ls[0] & rs[1]) | (ls[1] & rs[0]);
902 s = (ls[0] & rs[2]) | (ls[1] & rs[1]) | (ls[2] & rs[0]);
906 s = ls[0] | (ls[1] & rs[2]) | (ls[2] & rs[1]) | rs[0];
918 qs[1] = qs[0] | (ls[0] & rs[1]) | (ls[1] & rs[0]);
919 qs[2] = qs[1] | (ls[0] & rs[2]) | (ls[1] & rs[1]) | (ls[2] & rs[0]);
922 qs[1] = qs[0] | (ls[0] & rs[2]) | (ls[1] & rs[1]) | (ls[2] & rs[0]);
923 qs[2] = qs[1] | ls[0] | (ls[1] & rs[2]) | (ls[2] & rs[1]) | rs[0];
926 qs[1] = qs[0] | ls[0] | (ls[1] & rs[2]) | (ls[2] & rs[1]) | rs[0];
927 qs[2] = qs[1] | ls[1] | (ls[2] & rs[2]) | rs[1];
930 qs[1] = qs[0] | ls[1] | (ls[2] & rs[2]) | rs[1];
931 qs[2] = qs[1] | ls[2] | rs[2];
934 for (aa = ala; (long)aa <= (long)stop; aa = (aas)((long)aa + 1)) {
936 for (i = 0; (!done) && (i <= 1); i++) {
937 if (((1L << ((long)aa)) & qs[i]) != 0) {
938 for (j = i+1; j <= 2; j++)
939 qs[j] |= translate[(long)aa - (long)ala][j-i];
947 memcpy(p->siteset[m], qs, sizeof(sitearray));
952 void protpreorder(node *p)
954 /* recompute number of steps in preorder taking both ancestoral and
955 descendent steps into account */
956 if (p != NULL && !p->tip) {
957 protfillin (p->next, p->next->next->back, p->back);
958 protfillin (p->next->next, p->back, p->next->back);
959 protpreorder (p->next->back);
960 protpreorder (p->next->next->back);
965 void protadd(node *below, node *newtip, node *newfork)
967 /* inserts the nodes newfork and its left descendant, newtip,
968 to the tree. below becomes newfork's right descendant */
970 if (below != treenode[below->index - 1])
971 below = treenode[below->index - 1];
972 if (below->back != NULL)
973 below->back->back = newfork;
974 newfork->back = below->back;
975 below->back = newfork->next->next;
976 newfork->next->next->back = below;
977 newfork->next->back = newtip;
978 newtip->back = newfork->next;
984 protfillin (newfork, newfork->next->back, newfork->next->next->back);
985 protpreorder(newfork);
987 protpreorder(newfork->back);
992 void protre_move(node **item, node **fork)
994 /* removes nodes item and its ancestor, fork, from the tree.
995 the new descendant of fork's ancestor is made to be
996 fork's second descendant (other than item). Also
997 returns pointers to the deleted nodes, item and fork */
1000 if ((*item)->back == NULL) {
1004 *fork = treenode[(*item)->back->index - 1];
1005 if ((*item) == (*fork)->next->back)
1006 other = (*fork)->next->next->back;
1007 else other = (*fork)->next->back;
1010 p = (*item)->back->next->back;
1011 q = (*item)->back->next->next->back;
1012 if (p != NULL) p->back = q;
1013 if (q != NULL) q->back = p;
1014 (*fork)->back = NULL;
1019 } while (p != (*fork));
1020 (*item)->back = NULL;
1022 protpreorder(other);
1023 if (other != root) protpreorder(other->back);
1028 void evaluate(node *r)
1030 /* determines the number of steps needed for a tree. this is the
1031 minimum number of steps needed to evolve sequences on this tree */
1032 long i, steps, term;
1036 for (i = 0; i < (chars); i++) {
1037 steps = r->numsteps[i];
1038 if (steps <= threshwt[i])
1043 if (usertree && which <= maxuser)
1044 fsteps[which - 1][i] = term;
1046 if (usertree && which <= maxuser) {
1047 nsteps[which - 1] = sum;
1051 } else if (sum < minsteps) {
1060 void protpostorder(node *p)
1062 /* traverses a binary tree, calling PROCEDURE fillin at a
1063 node's descendants before calling fillin at the node */
1066 protpostorder(p->next->back);
1067 protpostorder(p->next->next->back);
1068 protfillin(p, p->next->back, p->next->next->back);
1069 } /* protpostorder */
1072 void protreroot(node *outgroup)
1074 /* reorients tree, putting outgroup in desired position. */
1077 if (outgroup->back->index == root->index)
1080 q = root->next->next;
1081 p->back->back = q->back;
1082 q->back->back = p->back;
1084 q->back = outgroup->back;
1085 outgroup->back->back = q;
1090 void protsavetraverse(node *p, long *pos, boolean *found)
1092 /* sets BOOLEANs that indicate which way is down */
1096 p->next->bottom = false;
1097 protsavetraverse(p->next->back, pos,found);
1098 p->next->next->bottom = false;
1099 protsavetraverse(p->next->next->back, pos,found);
1100 } /* protsavetraverse */
1103 void protsavetree(long *pos, boolean *found)
1105 /* record in place where each species has to be
1106 added to reconstruct this tree */
1111 protreroot(treenode[outgrno - 1]);
1112 protsavetraverse(root, pos,found);
1113 for (i = 0; i < (nonodes); i++)
1115 place[root->index - 1] = 1;
1116 for (i = 1; i <= (spp); i++) {
1117 p = treenode[i - 1];
1118 while (place[p->index - 1] == 0) {
1119 place[p->index - 1] = i;
1125 place[i - 1] = place[p->index - 1];
1126 j = place[p->index - 1];
1129 place[p->index - 1] = spp + i - 1;
1135 done = (place[p->index - 1] != j);
1139 } /* protsavetree */
1142 void tryadd(node *p, node **item, node **nufork)
1144 /* temporarily adds one fork and one tip to the tree.
1145 if the location where they are added yields greater
1146 "likelihood" than other locations tested up to that
1147 time, then keeps that location as there */
1153 protfillin(temp, *item, p);
1155 protfillin(temp1, *item, p);
1156 protfillin(temp, temp1, p->back);
1160 if (like < bestlike) {
1161 if ((*item) == (*nufork)->next->next->back) {
1162 q = (*nufork)->next;
1163 (*nufork)->next = (*nufork)->next->next;
1164 (*nufork)->next->next = q;
1165 q->next = (*nufork);
1168 else if (like >= bstlike2) {
1170 protadd(p, (*item), (*nufork));
1171 rute = root->next->back;
1172 protsavetree(&pos,&found);
1174 if (like > bstlike2) {
1175 bestlike = bstlike2 = like;
1178 addtree(pos, &nextree, dummy, place, bestrees);
1181 findtree(&found, &pos, nextree, place, bestrees);
1183 if (nextree <= maxtrees)
1184 addtree(pos, &nextree, dummy, place, bestrees);
1187 protre_move (item, nufork);
1191 if (like >= bestyet) {
1198 void addpreorder(node *p, node *item, node *nufork)
1200 /* traverses a binary tree, calling PROCEDURE tryadd
1201 at a node before calling tryadd at its descendants */
1205 tryadd(p, &item,&nufork);
1207 addpreorder(p->next->back, item, nufork);
1208 addpreorder(p->next->next->back, item, nufork);
1213 void tryrearr(node *p, boolean *success)
1215 /* evaluates one rearrangement of the tree.
1216 if the new tree has greater "likelihood" than the old
1217 one sets success := TRUE and keeps the new tree.
1218 otherwise, restores the old tree */
1219 node *frombelow, *whereto, *forknode, *q;
1222 if (p->back == NULL)
1224 forknode = treenode[p->back->index - 1];
1225 if (forknode->back == NULL)
1228 if (p->back->next->next == forknode)
1229 frombelow = forknode->next->next->back;
1231 frombelow = forknode->next->back;
1232 whereto = treenode[forknode->back->index - 1];
1233 if (whereto->next->back == forknode)
1234 q = whereto->next->next->back;
1236 q = whereto->next->back;
1237 protfillin(temp1, frombelow, q);
1238 protfillin(temp, temp1, p);
1239 protfillin(temp1, temp, whereto->back);
1241 if (like <= oldlike) {
1242 if (p == forknode->next->next->back) {
1244 forknode->next = forknode->next->next;
1245 forknode->next->next = q;
1251 protre_move(&p, &forknode);
1252 protfillin(whereto, whereto->next->back, whereto->next->next->back);
1254 protadd(whereto, p, forknode);
1261 void repreorder(node *p, boolean *success)
1263 /* traverses a binary tree, calling PROCEDURE tryrearr
1264 at a node before calling tryrearr at its descendants */
1267 tryrearr(p,success);
1269 repreorder(p->next->back,success);
1270 repreorder(p->next->next->back,success);
1275 void rearrange(node **r)
1277 /* traverses the tree (preorder), finding any local
1278 rearrangement which decreases the number of steps.
1279 if traversal succeeds in increasing the tree's
1280 "likelihood", PROCEDURE rearrange runs traversal again */
1281 boolean success = true;
1284 repreorder(*r, &success);
1289 void protgetch(Char *c)
1291 /* get next nonblank character */
1296 if (*c == '\n' || *c == '\t')
1298 } while (!(*c != ' ' || eoff(intree)));
1302 void protaddelement(node **p,long *nextnode,long *lparens,boolean *names)
1304 /* recursive procedure adds nodes to user-defined tree */
1313 if ((*lparens) >= spp - 1) {
1314 printf("\nERROR IN USER TREE: TOO MANY LEFT PARENTHESES\n");
1319 q = treenode[(*nextnode) - 1];
1320 protaddelement(&q->next->back, nextnode,lparens,names);
1321 q->next->back->back = q->next;
1322 findch(',', &ch, which);
1323 protaddelement(&q->next->next->back, nextnode,lparens,names);
1324 q->next->next->back->back = q->next->next;
1325 findch(')', &ch, which);
1329 for (i = 0; i < nmlngth; i++)
1340 } while (ch != ',' && ch != ')' && ch != ':' && n <= nmlngth);
1344 for (i = 0; i < nmlngth; i++)
1345 found = (found && ((str[i] == nayme[n - 1][i]) ||
1346 ((nayme[n - 1][i] == '_') && (str[i] == ' '))));
1348 if (names[n - 1] == false) {
1349 *p = treenode[n - 1];
1350 names[n - 1] = true;
1352 printf("\nERROR IN USER TREE: DUPLICATE NAME FOUND -- ");
1353 for (i = 0; i < nmlngth; i++)
1354 putchar(nayme[n - 1][i]);
1360 } while (!(n > spp || found));
1363 printf("CANNOT FIND SPECIES: ");
1364 for (i = 0; i < nmlngth; i++)
1367 } /* protaddelement */
1372 /* read in user-defined tree and set it up */
1373 long nextnode, lparens, i;
1375 root = treenode[spp];
1378 names = (boolean *)Malloc(spp*sizeof(boolean));
1379 for (i = 0; i < (spp); i++)
1382 protaddelement(&root, &nextnode,&lparens,names);
1389 findch(';', &ch, which);
1392 } /* prottreeread */
1395 void protancestset(long *a, long *b, long *c, long *d, long *k)
1397 /* sets up the aa set array. */
1405 for (i = 0; i <= 5; i++) {
1412 for (j = n; j <= (i - n); j++) {
1417 for (m = n; m <= (i - j - n); m++) {
1427 if (counted || s != 0) {
1434 for (i = 0; i <= 1; i++) {
1435 for (aa = ala; (long)aa <= (long)stop; aa = (aas)((long)aa + 1)) {
1436 if (((1L << ((long)aa)) & d[i]) != 0) {
1437 for (j = i + 1; j <= 2; j++)
1438 d[j] |= translate[(long)aa - (long)ala][j - i];
1442 } /* protancestset */
1445 void prothyprint(long b1, long b2, boolean *bottom, node *r,
1446 boolean *nonzero, boolean *maybe)
1448 /* print out states in sites b1 through b2 at node */
1456 fprintf(outfile, " ");
1458 fprintf(outfile, "root ");
1460 fprintf(outfile, "%3ld ", r->back->index - spp);
1462 for (i = 0; i < nmlngth; i++)
1463 putc(nayme[r->index - 1][i], outfile);
1465 fprintf(outfile, "%4ld ", r->index - spp);
1467 fprintf(outfile, " ");
1469 fprintf(outfile, " yes ");
1471 fprintf(outfile, " maybe ");
1473 fprintf(outfile, " no ");
1474 for (i = b1 - 1; i < b2; i++) {
1590 if (!(*bottom) && dotdiff)
1591 dot = (r->siteset[i] [0] == treenode[r->back->index - 1]->siteset[i][0]
1592 || ((r->siteset[i][0] &
1593 (~((1L << ((long)ser1)) | (1L << ((long)ser2)) |
1594 (1L << ((long)ser))))) == 0 &&
1595 (treenode[r->back->index - 1]->siteset[i] [0] &
1596 (~((1L << ((long)ser1)) | (1L << ((long)ser2)) |
1597 (1L << ((long)ser))))) == 0));
1604 if ((i + 1) % 10 == 0)
1607 putc('\n', outfile);
1611 void prothyptrav(node *r, sitearray *hypset, long b1, long b2, long *k,
1612 boolean *bottom, sitearray nothing)
1614 boolean maybe, nonzero;
1618 gseq *ancset, *temparray;
1621 protgnu(&temparray);
1624 for (i = b1 - 1; i < b2; i++) {
1626 protancestset(hypset[i], r->next->back->siteset[i],
1627 r->next->next->back->siteset[i], temparray->seq[i], k);
1628 memcpy(r->siteset[i], temparray->seq[i], sizeof(sitearray));
1631 anc = treenode[r->back->index - 1]->siteset[i][0];
1633 hset = r->siteset[i][0];
1635 for (aa = ala; (long)aa <= (long)stop; aa = (aas)((long)aa + 1)) {
1636 if (hset == 1L << ((long)aa))
1639 if (hset == ((1L << ((long)asn)) | (1L << ((long)asp))))
1641 if (hset == ((1L << ((long)gln)) | (1L << ((long)gly))))
1643 if (hset == ((1L << ((long)ser1)) | (1L << ((long)ser2))))
1645 if (hset == fullset)
1648 nonzero = (nonzero || (r->siteset[i][0] & anc) == 0);
1649 maybe = (maybe || r->siteset[i][0] != anc);
1651 prothyprint(b1, b2,bottom,r,&nonzero,&maybe);
1654 memcpy(temparray->seq, r->next->back->siteset, chars*sizeof(sitearray));
1655 for (i = b1 - 1; i < b2; i++)
1656 protancestset(hypset[i], r->next->next->back->siteset[i], nothing,
1658 prothyptrav(r->next->back, ancset->seq, b1, b2,k,bottom,nothing );
1659 for (i = b1 - 1; i < b2; i++)
1660 protancestset(hypset[i], temparray->seq[i], nothing, ancset->seq[i],k);
1661 prothyptrav(r->next->next->back, ancset->seq, b1, b2, k,bottom,nothing);
1663 protchuck(temparray);
1668 void prothypstates(long *k)
1670 /* fill in and describe states at interior nodes */
1676 fprintf(outfile, "\nFrom To Any Steps? State at upper node\n");
1677 fprintf(outfile, " ");
1678 fprintf(outfile, "( . means same as in the node below it on tree)\n\n");
1679 memcpy(nothing, translate[(long)quest - (long)ala], sizeof(sitearray));
1680 hypset = (seqptr)Malloc(chars*sizeof(sitearray));
1681 for (i = 0; i < (chars); i++)
1682 memcpy(hypset[i], nothing, sizeof(sitearray));
1684 for (i = 1; i <= ((chars - 1) / 40 + 1); i++) {
1685 putc('\n', outfile);
1690 prothyptrav(root, hypset, i * 40 - 39, n, k,&bottom,nothing);
1693 } /* prothypstates */
1698 /* prints ancestors, steps and table of numbers of steps in
1703 fprintf(outfile, "\nrequires a total of %10.3f\n", like / -10);
1705 putc('\n', outfile);
1707 fprintf(outfile, "weighted ");
1708 fprintf(outfile, "steps in each position:\n");
1709 fprintf(outfile, " ");
1710 for (i = 0; i <= 9; i++)
1711 fprintf(outfile, "%4ld", i);
1712 fprintf(outfile, "\n *-----------------------------------------\n");
1713 for (i = 0; i <= (chars / 10); i++) {
1714 fprintf(outfile, "%5ld", i * 10);
1716 for (j = 0; j <= 9; j++) {
1718 if (k == 0 || k > chars)
1719 fprintf(outfile, " ");
1721 fprintf(outfile, "%4ld", root->numsteps[k - 1] / 10);
1723 putc('\n', outfile);
1728 putc('\n', outfile);
1730 putc('\n', outfile);
1733 treeout(root, nextree, &col, root);
1740 /* constructs a binary tree from the pointers in treenode.
1741 adds each node at location which yields highest "likelihood"
1742 then rearranges the tree for greatest "likelihood" */
1743 long i, j, numtrees;
1745 node *item, *nufork, *dummy;
1748 for (i = 1; i <= (spp); i++)
1749 enterorder[i - 1] = i;
1751 randumize(seed, enterorder);
1752 root = treenode[enterorder[0] - 1];
1754 protadd(treenode[enterorder[0] - 1], treenode[enterorder[1] - 1],
1757 printf("\nAdding species:\n");
1758 writename(0, 2, enterorder);
1761 for (i = 3; i <= (spp); i++) {
1762 bestyet = -30.0*spp*chars;
1764 item = treenode[enterorder[i - 1] - 1];
1765 nufork = treenode[spp + i - 2];
1766 addpreorder(root, item, nufork);
1767 protadd(there, item, nufork);
1771 writename(i - 1, 1, enterorder);
1772 lastrearr = (i == spp);
1775 printf("\nDoing global rearrangements\n");
1777 for (j = 1; j <= nonodes; j++)
1778 if ( j % (( nonodes / 72 ) + 1 ) == 0 )
1784 bstlike2 = bestlike = -30.0*spp*chars;
1791 for (j = 0; j < (nonodes); j++) {
1792 bestyet = -30.0*spp*chars;
1795 nufork = treenode[treenode[j]->back->index - 1];
1796 protre_move(&item, &nufork);
1798 addpreorder(root, item, nufork);
1799 protadd(there, item, nufork);
1802 if ( j % (( nonodes / 72 ) + 1 ) == 0 )
1809 } while (bestlike > gotlike);
1814 for (i = spp - 1; i >= 1; i--)
1815 protre_move(&treenode[i], &dummy);
1816 if (jumb == njumble) {
1818 putc('\n', outfile);
1820 fprintf(outfile, "One most parsimonious tree found:\n");
1822 fprintf(outfile, "%6ld trees in all found\n", nextree - 1);
1824 if (nextree > maxtrees + 1) {
1826 fprintf(outfile, "here are the first%4ld of them\n", (long)maxtrees);
1827 nextree = maxtrees + 1;
1830 putc('\n', outfile);
1832 for (i = 0; i <= (nextree - 2); i++) {
1834 protadd(treenode[0], treenode[1], treenode[spp]);
1835 for (j = 3; j <= (spp); j++)
1836 protadd(treenode[bestrees[i].btree[j - 1] - 1], treenode[j - 1],
1837 treenode[spp + j - 2]);
1838 protreroot(treenode[outgrno - 1]);
1839 protpostorder(root);
1841 printree(root, 1.0);
1843 for (j = 1; j < (spp); j++)
1844 protre_move(&treenode[j], &dummy);
1848 openfile(&intree,INTREE,"input tree file", "r",progname,intreename);
1849 numtrees = countsemic(&intree);
1851 fprintf(outfile, "User-defined tree");
1854 fprintf(outfile, ":\n\n\n\n");
1857 while (which <= numtrees) {
1860 protreroot(treenode[outgrno - 1]);
1861 protpostorder(root);
1863 printree(root, 1.0);
1869 putc('\n', outfile);
1870 if (numtrees > 1 && chars > 1 )
1871 standev(chars, numtrees, minwhich, minsteps, nsteps, fsteps, seed);
1873 if (jumb == njumble && progress) {
1874 printf("Output written to file \"%s\"\n\n", outfilename);
1876 printf("Trees also written onto file \"%s\"\n\n", outtreename);
1881 int main(int argc, Char *argv[])
1882 { /* Protein parsimony by uphill search */
1884 argc = 1; /* macsetup("Protpars",""); */
1885 argv[0] = "Protpars";
1889 openfile(&infile,INFILE,"input file", "r",argv[0],infilename);
1890 openfile(&outfile,OUTFILE,"output file", "w",argv[0],outfilename);
1901 if (weights || justwts)
1902 openfile(&weightfile,WEIGHTFILE,"weights file","r",argv[0],weightfilename);
1904 openfile(&outtree,OUTTREE,"output tree file", "w",argv[0],outtreename);
1905 for (ith = 1; ith <= msets; ith++) {
1909 if (msets > 1 && !justwts) {
1910 fprintf(outfile, "Data set # %ld:\n\n",ith);
1912 printf("Data set # %ld:\n\n",ith);
1914 for (jumb = 1; jumb <= njumble; jumb++)
1921 fixmacfile(outfilename);
1922 fixmacfile(outtreename);
1925 } /* Protein parsimony by uphill search */