5 * Part of TREE-PUZZLE 5.0 (June 2000)
7 * (c) 1999-2000 by Heiko A. Schmidt, Korbinian Strimmer,
8 * M. Vingron, and Arndt von Haeseler
9 * (c) 1995-1999 by Korbinian Strimmer and Arndt von Haeseler
11 * All parts of the source except where indicated are distributed under
12 * the GNU public licence. See http://www.opensource.org for details.
20 # define PACKAGE "tree-puzzle"
23 # define VERSION "5.0"
25 #define DATE "October 2000"
43 #ifndef PARALLEL /* because printf() runs significantly faster */
44 /* than fprintf(stdout) on an Apple McIntosh */
46 # define FPRINTF printf
49 # define FPRINTF fprintf
50 # define STDOUTFILE STDOUT,
54 # define FILENAMELENTH 2048
57 # define INFILEDEFAULT "infile"
58 # define OUTFILEDEFAULT "outfile"
59 # define TREEFILEDEFAULT "outtree"
60 # define INTREEDEFAULT "intree"
61 # define DISTANCESDEFAULT "outdist"
62 # define TRIANGLEDEFAULT "outlm.eps"
63 # define UNRESOLVEDDEFAULT "outqlist"
64 # define ALLQUARTDEFAULT "outallquart"
65 # define ALLQUARTLHDEFAULT "outallquartlh"
66 # define OUTPTLISTDEFAULT "outpstep"
67 # define OUTPTORDERDEFAULT "outptorder"
69 # define INFILE infilename
70 # define OUTFILE outfilename
71 # define TREEFILE outtreename
72 # define INTREE intreename
73 # define DISTANCES outdistname
74 # define TRIANGLE outlmname
75 # define UNRESOLVED outqlistname
76 # define ALLQUART outallquartname
77 # define ALLQUARTLH outallquartlhname
78 # define OUTPTLIST outpstepname
79 # define OUTPTORDER outptordername
81 EXTERN char infilename [FILENAMELENTH];
82 EXTERN char outfilename [FILENAMELENTH];
83 EXTERN char outtreename [FILENAMELENTH];
84 EXTERN char intreename [FILENAMELENTH];
85 EXTERN char outdistname [FILENAMELENTH];
86 EXTERN char outlmname [FILENAMELENTH];
87 EXTERN char outqlistname [FILENAMELENTH];
88 EXTERN char outallquartname [FILENAMELENTH];
89 EXTERN char outallquartlhname [FILENAMELENTH];
90 EXTERN char outpstepname [FILENAMELENTH];
91 EXTERN char outptordername [FILENAMELENTH];
93 #define OUTFILEEXT "puzzle"
94 #define TREEFILEEXT "tree"
95 #define DISTANCESEXT "dist"
96 #define TRIANGLEEXT "eps"
97 #define UNRESOLVEDEXT "qlist"
98 #define ALLQUARTEXT "allquart"
99 #define ALLQUARTLHEXT "allquartlh"
100 #define OUTPTLISTEXT "pstep"
101 #define OUTPTORDEREXT "ptorder"
103 #ifndef PARALLEL /* because printf() runs significantly faster */
104 /* than fprintf(stdout) on an Apple McIntosh */
106 # define FPRINTF printf
109 # define FPRINTF fprintf
110 # define STDOUT stdout
111 # define STDOUTFILE STDOUT,
115 /* auto_aamodel/auto_datatype values (xxx) */
118 #define AUTO_DEFAULT 2
121 /* qptlist values (xxx) */
122 #define PSTOUT_NONE 0
123 #define PSTOUT_ORDER 1
124 #define PSTOUT_LISTORDER 2
125 #define PSTOUT_LIST 3
127 /* dtat_optn values (xxx) */
132 /* typ_optn values (xxx) */
133 #define LIKMAPING_OPTN 1
134 #define TREERECON_OPTN 0
136 /* puzzlemodes (xxx) */
141 /* rhetmodes (xxx) Modes of rate heterogeneity */
142 #define UNIFORMRATE 0
147 /* defines for types of quartet likelihood computation (xxx) */
152 typedef struct oneedge {
153 /* pointer to other three edges */
155 struct oneedge *downleft;
156 struct oneedge *downright;
157 int numedge; /* number of edge */
158 uli edgeinfo; /* value of this edge */
159 int *edgemap; /* pointer to the local edgemap */
164 EXTERN cmatrix biparts; /* bipartitions of tree of current puzzling step */
165 EXTERN cmatrix consbiparts; /* bipartitions of majority rule consensus tree */
166 EXTERN cmatrix seqchars; /* characters contained in data set */
167 EXTERN cmatrix treepict; /* picture of consensus tree */
168 EXTERN double minscore; /* value of edgescore on minedge */
169 EXTERN double tstvf84; /* F84 transition/transversion ratio */
170 EXTERN double tstvratio; /* expected transition/transversion ratio */
171 EXTERN double yrtsratio; /* expected pyrimidine/purine transition ratio */
172 EXTERN dvector ulkl; /* log L of user trees */
173 EXTERN dmatrix allsites; /* log L per sites of user trees */
174 EXTERN dvector ulklc; /* log L of user trees (clock) */
175 EXTERN dmatrix allsitesc; /* log L per sites of user trees (clock) */
176 EXTERN FILE *utfp; /* pointer to user tree file */
177 EXTERN FILE *ofp; /* pointer to output file */
178 EXTERN FILE *seqfp; /* pointer to sequence input file */
179 EXTERN FILE *tfp; /* pointer to tree file */
180 EXTERN FILE *dfp; /* pointer to distance file */
181 EXTERN FILE *trifp; /* pointer to triangle file */
182 EXTERN FILE *unresfp; /* pointer to file with unresolved quartets */
183 EXTERN FILE *tmpfp; /* pointer to temporary file */
184 EXTERN FILE *qptlist; /* pointer to file with puzzling step trees */
185 EXTERN FILE *qptorder; /* pointer to file with unique puzzling step trees */
186 EXTERN int SHcodon; /* whether SH should be applied to 1st, 2nd codon positions */
187 EXTERN int utree_optn; /* use first user tree for estimation */
188 EXTERN int listqptrees; /* list puzzling step trees */
189 EXTERN int approxqp; /* approximate QP quartets */
190 EXTERN int *edgeofleaf; /* vector with edge number of all leaves */
191 EXTERN int codon_optn; /* declares what positions in a codon should be used */
192 EXTERN int compclock; /* computation of clocklike branch lengths */
193 EXTERN int chooseA; /* leaf variable */
194 EXTERN int chooseB; /* leaf variable */
195 EXTERN int clustA, clustB, clustC, clustD; /* number of members of LM clusters */
196 EXTERN int column; /* used for breaking lines (writing tree to treefile) */
197 EXTERN int Frequ_optn; /* use empirical base frequencies */
198 EXTERN int Maxbrnch; /* 2*Maxspc - 3 */
199 EXTERN int Maxseqc; /* number of sequence characters per taxum */
200 EXTERN int mflag; /* flag used for correct printing of runtime messages */
201 EXTERN int minedge; /* edge with minimum edgeinfo */
202 EXTERN int nextedge; /* number of edges in the current tree */
203 EXTERN int nextleaf; /* next leaf to add to tree */
204 EXTERN int numclust; /* number of clusters in LM analysis */
205 EXTERN int outgroup; /* outgroup */
206 EXTERN int puzzlemode; /* computation of QP tree and/or ML distances */
207 EXTERN int rootsearch; /* how location of root is found */
208 EXTERN int rhetmode; /* model of rate heterogeneity */
209 EXTERN int splitlength; /* length of one entry in splitpatterns */
210 EXTERN int *splitsizes; /* size of all different splits of all trees */
211 EXTERN int usebestq_optn; /* use only best quartet topology, no bayesian weights */
212 EXTERN int show_optn; /* show unresolved quartets */
213 EXTERN int savequart_optn; /* save memory block which quartets to file */
214 EXTERN int savequartlh_optn; /* save quartet likelihoods to file */
215 EXTERN int saveqlhbin_optn; /* save quartet likelihoods binary */
216 EXTERN int readquart_optn; /* read memory block which quartets from file */
217 EXTERN int sym_optn; /* symmetrize doublet frequencies */
218 EXTERN int xsize; /* depth of consensus tree picture */
219 EXTERN int ytaxcounter; /* counter for establishing y-coordinates of all taxa */
220 EXTERN int numutrees; /* number of users trees in input tree file */
221 EXTERN ivector clusterA, clusterB, clusterC, clusterD; /* clusters for LM analysis */
222 EXTERN ivector consconfid; /* confidence values of majority rule consensus tree */
223 EXTERN ivector conssizes; /* partition sizes of majority rule consensus tree */
224 EXTERN ivector trueID; /* leaf -> taxon on this leaf */
225 EXTERN ivector xcor; /* x-coordinates of consensus tree nodes */
226 EXTERN ivector ycor; /* y-coordinates of consensus tree nodes */
227 EXTERN ivector ycormax; /* maximal y-coordinates of consensus tree nodes */
228 EXTERN ivector ycormin; /* minimal y-coordinates of consensus tree nodes */
229 EXTERN ivector ycortax; /* y-coordinates of all taxa */
230 EXTERN ONEEDGE *edge; /* vector with all the edges of the tree */
231 EXTERN uli *splitcomp; /* bipartition storage */
232 EXTERN uli *splitfreqs; /* frequencies of all different splits of all trees */
233 EXTERN uli *splitpatterns; /* all different splits of all trees */
234 EXTERN uli badqs; /* number of bad quartets */
235 EXTERN uli consincluded; /* number of included biparts in the consensus tree */
236 EXTERN uli Currtrial; /* counter for puzzling steps */
237 EXTERN uli maxbiparts; /* space is reserved for that many bipartitions */
238 EXTERN uli mininfo; /* value of edgeinfo on minedge */
239 EXTERN uli numbiparts; /* number of different bipartitions */
240 EXTERN uli Numquartets; /* number of quartets */
241 EXTERN uli Numtrial; /* number of puzzling steps */
242 EXTERN uli lmqts; /* quartets investigated in LM analysis (0 = ALL) */
244 EXTERN int auto_datatype; /* guess datatype ? */
245 EXTERN int guessdata_optn; /* guessed datatype */
247 EXTERN int auto_aamodel; /* guess amino acid modell ? */
248 EXTERN int guessauto_aamodel; /* guessed amino acid modell ? */
249 EXTERN int guessDayhf_optn; /* guessed Dayhoff model option */
250 EXTERN int guessJtt_optn; /* guessed JTT model option */
251 EXTERN int guessblosum62_optn; /* guessed BLOSUM 62 model option */
252 EXTERN int guessmtrev_optn; /* guessed mtREV model option */
253 EXTERN int guesscprev_optn; /* guessed cpREV model option */
254 EXTERN int guessvtmv_optn; /* guessed VT model option */
255 EXTERN int guesswag_optn; /* guessed WAG model option */
257 /* counter variables needed in likelihood mapping analysis */
258 EXTERN uli ar1, ar2, ar3;
259 EXTERN uli reg1, reg2, reg3, reg4, reg5, reg6, reg7;
260 EXTERN uli reg1l, reg1r, reg2u, reg2d, reg3u, reg3d,
261 reg4u, reg4d, reg5l, reg5r, reg6u, reg6d;
262 EXTERN unsigned char *quartetinfo; /* place where quartets are stored */
263 EXTERN dvector qweight; /* for use in QP and LM analysis */
264 EXTERN dvector sqdiff;
265 EXTERN ivector qworder;
266 EXTERN ivector sqorder;
269 EXTERN int psteptreestrlen;
271 typedef struct treelistitemtypedummy {
272 struct treelistitemtypedummy *pred;
273 struct treelistitemtypedummy *succ;
274 struct treelistitemtypedummy *sortnext;
275 struct treelistitemtypedummy *sortlast;
282 EXTERN treelistitemtype *psteptreelist;
283 EXTERN treelistitemtype *psteptreesortlist;
284 EXTERN int psteptreenum;
285 EXTERN int psteptreesum;
289 void makeF84model(void);
290 void compnumqts(void);
291 void setoptions(void);
292 void openfiletoread(FILE **, char[], char[]);
293 void openfiletowrite(FILE **, char[], char[]);
294 void openfiletoappend(FILE **, char[], char[]);
295 void closefile(FILE *);
296 void symdoublets(void);
297 void computeexpectations(void);
298 void putdistance(FILE *);
299 void findidenticals(FILE *);
300 double averagedist(void);
302 void plotlmpoint(FILE *, double, double);
303 void finishps(FILE *);
304 void makelmpoint(FILE *, double, double, double);
305 void printtreestats(FILE *);
306 void timestamp(FILE *);
307 void writeoutputfile(FILE *, int);
309 /* definitions for writing output */
311 #define WRITEPARAMS 1
314 void writetimesstat(FILE *ofp);
315 void writecutree(FILE *, int);
316 void starttimer(void);
317 void checktimer(uli);
318 void estimateparametersnotree(void);
319 void estimateparameterstree(void);
320 int main(int, char *[]);
321 int ulicmp(const void *, const void *);
322 int intcmp(const void *, const void *);
324 void readid(FILE *, int);
325 char readnextcharacter(FILE *, int, int);
326 void skiprestofline(FILE *, int, int);
327 void skipcntrl(FILE *, int, int);
328 void getseqs(FILE *);
330 void fputid10(FILE *, int);
331 int fputid(FILE *, int);
332 void getsizesites(FILE *);
333 void getdataset(FILE *);
334 int guessdatatype(void);
335 void translatedataset(void);
336 void estimatebasefreqs(void);
337 void guessmodel(void);
339 void addnextleaf(int);
341 void writeOTU(FILE *, int);
342 void writetree(FILE *);
344 void copytree(int *ctree);
345 void freectree(int **snodes);
346 void printctree(int *ctree);
347 char *sprintfctree(int *ctree, int strlen);
348 void fprintffullpstree(FILE *outf, char *treestr);
349 int printfsortctree(int *ctree);
350 int sortctree(int *ctree);
351 int ct_1stedge(int node);
352 int ct_2ndedge(int node);
353 int ct_3rdedge(int node);
355 void printfpstrees(treelistitemtype *list);
356 void printfsortedpstrees(treelistitemtype *list);
357 void fprintfsortedpstrees(FILE *output, treelistitemtype *list, int itemnum, int itemsum, int comment, float cutoff);
359 void sortbynum(treelistitemtype *list, treelistitemtype **sortlist);
360 treelistitemtype *addtree2list(char **tree,
362 treelistitemtype **list,
365 void freetreelist(treelistitemtype **list,
368 void resetedgeinfo(void);
369 void incrementedgeinfo(int, int);
370 void minimumedgeinfo(void);
371 void initconsensus(void);
372 void makepart(int, int);
373 void computebiparts(void);
374 void printsplit(FILE *, uli);
375 void makenewsplitentries(void);
376 void copysplit(uli, int);
377 void makeconsensus(void);
378 void writenode(FILE *, int);
379 void writeconsensustree(FILE *);
380 void nodecoordinates(int);
381 void drawnode(int, int);
382 void plotconsensustree(FILE *);
383 unsigned char *mallocquartets(int);
384 void freequartets(void);
385 unsigned char readquartet(int, int, int, int);
386 void writequartet(int, int, int, int, unsigned char);
387 void sort3doubles(dvector, ivector);
388 void computeallquartets(void);
389 void checkquartet(int, int, int, int);
390 void num2quart(uli qnum, int *a, int *b, int *c, int *d);
391 uli numquarts(int maxspc);
392 uli quart2num (int a, int b, int c, int d);
394 void writetpqfheader(int nspec, FILE *ofp, int flag);
397 /* extracted from main (xxx) */
398 void compute_quartlklhds(int a, int b, int c, int d, double *d1, double *d2, double *d3, int approx);
401 /* definitions for timing */
415 clock_t tempcpustart;
418 time_t temptimestart;
428 double mincputicktime;
436 double quartblockcpu;
454 double quartblocktime;
458 double puzzblocktime;
462 double treeblocktime;
469 EXTERN double cputime, walltime;
470 EXTERN double fullcpu, fulltime;
471 EXTERN double fullcputime, fullwalltime;
472 EXTERN double altcputime, altwalltime;
473 EXTERN clock_t cputimestart, cputimestop, cputimedummy;
474 EXTERN time_t walltimestart, walltimestop, walltimedummy;
475 EXTERN clock_t Startcpu; /* start cpu time */
476 EXTERN clock_t Stopcpu; /* stop cpu time */
477 EXTERN time_t Starttime; /* start time */
478 EXTERN time_t Stoptime; /* stop time */
479 EXTERN time_t time0; /* timer variable */
480 EXTERN time_t time1; /* yet another timer */
481 EXTERN time_t time2; /* yet another timer */
482 EXTERN timearray_t tarr;
484 void resetqblocktime(timearray_t *ta);
485 void resetpblocktime(timearray_t *ta);
486 void inittimearr(timearray_t *ta);
487 void addtimes(int jobtype, timearray_t *ta);
489 void printtimearr(timearray_t *ta);
490 #endif /* TIMEDEBUG */
492 #endif /* _PUZZLE_ */