5 * Part of TREE-PUZZLE 5.0 (June 2000)
7 * (c) 1999-2000 by Heiko A. Schmidt, Korbinian Strimmer,
8 * M. Vingron, and Arndt von Haeseler
9 * (c) 1995-1999 by Korbinian Strimmer and Arndt von Haeseler
11 * All parts of the source except where indicated are distributed under
12 * the GNU public licence. See http://www.opensource.org for details.
21 #define MINTS 0.20 /* Ts/Tv parameter */
23 #define MINYR 0.10 /* Y/R Ts parameter */
25 #define MINFI 0.00 /* fraction invariable sites */
26 #define MAXFI 0.99 /* only for input */
27 #define MINGE 0.01 /* rate heterogeneity parameter */
29 #define MINCAT 4 /* discrete Gamma categories */
32 #define RMHROOT 5.0 /* upper relative bound for height of root */
33 #define MAXARC 900.0 /* upper limit on branch length (PAM) = 6.0 */
34 #define MINARC 0.001 /* lower limit on branch length (PAM) = 0.00001 */
35 #define EPSILON 0.0001 /* error in branch length (PAM) = 0.000001 */
36 #define HEPSILON 0.0001 /* error in node and root heights */
37 #define MAXIT 100 /* maximum number of iterates of smoothing */
38 #define MINFDIFF 0.00002 /* lower limit on base frequency differences */
39 #define MINFREQ 0.0001 /* lower limit on base frequencies = 0.01% */
40 #define NUMQBRNCH 5 /* number of branches in a quartet */
41 #define NUMQIBRNCH 1 /* number of internal branches in a quartet */
42 #define NUMQSPC 4 /* number of sequences in a quartet */
45 #define PEPS1 0.01 /* epsilon substitution process estimation */
46 #define PEPS2 0.01 /* epsilon rate heterogeneity estimation */
54 /* type definitions */
69 dcube partials; /* partial likelihoods */
70 char *label; /* internal labels */
76 Node **ebrnchp; /* list of pointers to external branches */
77 Node **ibrnchp; /* list of pointers to internal branches */
78 double lklhd; /* total log-likelihood */
79 double lklhdc; /* total log-likelihood clock */
80 dmatrix condlkl; /* likelihoods for each pattern and non-zero rate */
85 /* global variables */
87 EXTERN Node *chep; /* pointer to current height node */
88 EXTERN Node *rootbr; /* pointer to root branch */
89 EXTERN Node **heights; /* pointer to height nodes in unrooted tree */
90 EXTERN int Numhts; /* number of height nodes in unrooted tree */
91 EXTERN double hroot; /* height of root */
92 EXTERN double varhroot; /* variance of height of root */
93 EXTERN double maxhroot; /* maximal height of root */
94 EXTERN int locroot; /* location of root */
95 EXTERN int numbestroot; /* number of best locations for root */
96 EXTERN int clockmode; /* clocklike vs. nonclocklike computation */
97 EXTERN cmatrix Identif; /* sequence names */
98 EXTERN cmatrix Seqchar; /* ML sequence data */
99 EXTERN cmatrix Seqpat; /* ordered site patterns */
100 EXTERN ivector constpat; /* indicates constant site patterns */
101 EXTERN cvector seqchi;
102 EXTERN cvector seqchj;
103 EXTERN dcube partiali;
104 EXTERN dcube partialj;
105 EXTERN dcube ltprobr; /* transition probabilites (for all non-zero rates */
106 EXTERN dmatrix Distanmat; /* matrix with maximum likelihood distances */
107 EXTERN dmatrix Evec; /* Eigenvectors */
108 EXTERN dmatrix Ievc; /* Inverse eigenvectors */
109 EXTERN double TSparam; /* Ts/Tv parameter */
110 EXTERN double tsmean, yrmean;
111 EXTERN double YRparam; /* Y/R Ts parameter */
112 EXTERN double geerr; /* estimated error of rate heterogeneity */
113 EXTERN double Geta; /* rate heterogeneity parameter */
114 EXTERN double fracconst; /* fraction of constant sites */
115 EXTERN double fracconstpat;/* fraction of constant patterns */
116 EXTERN double Proportion; /* for tree drawing */
117 EXTERN double tserr; /* estimated error of TSparam */
118 EXTERN double yrerr; /* estimated error of YRparam */
119 EXTERN double fracinv; /* fraction of invariable sites */
120 EXTERN double fierr; /* estimated error of fracinv */
121 EXTERN dvector Brnlength;
122 EXTERN dvector Distanvec;
123 EXTERN dvector Eval; /* Eigenvalues of 1 PAM rate matrix */
124 EXTERN dvector Freqtpm; /* base frequencies */
125 EXTERN dvector Rates; /* rate of each of the categories */
127 EXTERN imatrix Basecomp; /* base composition of each taxon */
128 EXTERN ivector usedtaxa; /* list needed in the input treefile procedure */
129 EXTERN int numtc; /* auxiliary variable for printing rooted tree */
130 EXTERN int qcalg_optn; /* use quartet subsampling algorithm */
131 EXTERN int approxp_optn; /* approximate parameter estimation */
132 EXTERN int chi2fail; /* flag for chi2 test */
133 EXTERN int Converg; /* flag for ML convergence (no clock) */
134 EXTERN int Convergc; /* flag for ML convergence (clock) */
135 EXTERN int data_optn; /* type of sequence input data */
136 EXTERN int Dayhf_optn; /* Dayhoff model */
137 EXTERN int HKY_optn; /* use HKY model */
138 EXTERN int Jtt_optn; /* JTT model */
139 EXTERN int blosum62_optn; /* BLOSUM 62 model */
140 EXTERN int mtrev_optn; /* mtREV model */
141 EXTERN int cprev_optn; /* cpREV model */
142 EXTERN int vtmv_optn; /* VT model */
143 EXTERN int wag_optn; /* WAG model */
144 EXTERN int Maxsite; /* number of ML characters per taxum */
145 EXTERN int Maxspc; /* number of sequences */
146 EXTERN int mlmode; /* quartet ML or user defined tree ML */
147 EXTERN int nuc_optn; /* nucleotide (4x4) models */
148 EXTERN int Numbrnch; /* number of branches of current tree */
149 EXTERN int numcats; /* number of rate categories */
150 EXTERN int Numconst; /* number of constant sites */
151 EXTERN int Numconstpat; /* number of constant patterns */
152 EXTERN int Numibrnch; /* number of internal branches of current tree */
153 EXTERN int Numitc; /* number of ML iterations assumning clock */
154 EXTERN int Numit; /* number of ML iterations if there is convergence */
155 EXTERN int Numptrn; /* number of site patterns */
156 EXTERN int Numspc; /* number of sequences of current tree */
157 EXTERN int optim_optn; /* optimize model parameters */
158 EXTERN int grate_optim; /* optimize Gamma rate heterogeneity parameter */
159 EXTERN int SH_optn; /* SH nucleotide (16x16) model */
160 EXTERN int TN_optn; /* use TN model */
161 EXTERN int tpmradix; /* number of different states */
162 EXTERN int fracinv_optim; /* optimize fraction of invariable sites */
163 EXTERN int typ_optn; /* type of PUZZLE analysis */
164 EXTERN ivector Weight; /* weight of each site pattern */
165 EXTERN Tree *Ctree; /* pointer to current tree */
166 EXTERN ulivector badtaxon; /* involment of each taxon in a bad quartet */
167 EXTERN int qca, qcb, qcc, qcd; /* quartet currently optimized */
168 EXTERN ivector Alias; /* link site -> corresponding site pattern */
169 EXTERN ivector bestrate; /* optimal assignment of rates to sequence sites */
171 EXTERN int bestratefound;
173 /* function prototypes of all ml function */
175 void convfreq(dvector);
176 void radixsort(cmatrix, ivector, int, int, int *);
177 void condenceseq(cmatrix, ivector, cmatrix, ivector, int, int, int);
178 void countconstantsites(cmatrix, ivector, int, int, int *, int*);
179 void evaluateseqs(void);
180 void elmhes(dmatrix, ivector, int);
181 void eltran(dmatrix, dmatrix, ivector, int);
182 void mcdiv(double, double, double, double, double *, double *);
183 void hqr2(int, int, int, dmatrix, dmatrix, dvector, dvector);
184 void onepamratematrix(dmatrix);
185 void eigensystem(dvector, dmatrix);
186 void luinverse(dmatrix, dmatrix, int);
187 void checkevector(dmatrix, dmatrix, int);
188 void tranprobmat(void);
189 void tprobmtrx(double, dmatrix);
190 double comptotloglkl(dmatrix);
191 void allsitelkl(dmatrix, dvector);
192 double pairlkl(double);
193 double mldistance(int, int);
194 void initdistan(void);
195 void computedistan(void);
196 void productpartials(Node *);
197 void partialsinternal(Node *);
198 void partialsexternal(Node *);
199 void initpartials(Tree *);
200 double intlkl(double);
201 void optinternalbranch(Node *);
202 double extlkl(double);
203 void optexternalbranch(Node *);
204 void finishlkl(Node *);
205 double optlkl(Tree *);
206 double treelkl(Tree *);
207 void luequation(dmatrix, dvector, int);
208 void lslength(Tree *, dvector, int, int, dvector);
210 void getusertree(FILE *, cvector, int);
211 Node *internalnode(Tree *, char **, int *);
212 void constructtree(Tree *, cvector);
213 void removebasalbif(cvector);
214 void makeusertree(FILE *);
215 Tree *new_tree(int, int, cmatrix);
216 Tree *new_quartet(int, cmatrix);
217 void free_tree(Tree *, int);
218 void make_quartet(int, int, int, int);
219 void changedistan(dmatrix, dvector, int);
220 double quartet_lklhd(int, int, int, int);
221 double quartet_alklhd(int, int, int, int);
222 void readusertree(FILE *);
223 double usertree_lklhd(void);
224 double usertree_alklhd(void);
226 void distupdate(int, int, int, int);
228 void prbranch(Node *, int, int, int, ivector, ivector, FILE *);
229 void getproportion(double *, dvector, int);
230 void prtopology(FILE *);
231 void fputphylogeny(FILE *);
232 void resulttree(FILE *);
234 void njdistantree(Tree *);
235 void findbestratecombination(void);
236 void printbestratecombination(FILE *);
238 void fputsubstree(FILE *, Node *);
239 void fputrooted(FILE *, int);
240 void findheights(Node *);
242 double clock_alklhd(int);
243 double heightlkl(double);
244 void optheight(void);
245 double rheightlkl(double);
246 void optrheight(void);
247 double clock_lklhd(int);
248 int findrootedge(void);
249 void resultheights(FILE *);
251 double homogentest(int);
252 void YangDiscreteGamma(double, int, double *);
253 void updaterates(void);
254 void computestat(double *, int, double *, double *);
255 double quartetml(int, int, int, int);
256 double opttsq(double);
257 double optyrq(double);
258 void optimseqevolparamsq(void);
259 double opttst(double);
260 double optyrt(double);
261 void optimseqevolparamst(void);
262 double optfi(double);
263 double optge(double);
264 void optimrateparams(void);
266 int gettpmradix(void);
267 void rtfdata(dmatrix, double *);
268 int code2int(cvector);
271 void jttdata(dmatrix, double *);
272 void dyhfdata(dmatrix, double *);
273 void mtrevdata(dmatrix, double *);
274 void cprev45data(dmatrix, double *);
275 void blosum62data(dmatrix, double *);
276 void vtmvdata(dmatrix, double *);
277 void wagdata(dmatrix, double *);