5 /* version 3.6. (c) Copyright 1986-2004 by the University of Washington
6 and by Joseph Felsenstein. Written by Joseph Felsenstein and Lucas Mix.
7 Permission is granted to copy and use this program provided no fee is
8 charged for it and provided that this copyright notice is not removed. */
10 #define epsilon 0.0001 /* used in makenewv, getthree, update */
13 typedef long vall[maxcategs];
14 typedef double contribarr[maxcategs];
17 /* function prototypes */
18 void init_protmats(void);
19 void getoptions(void);
20 void makeprotfreqs(void);
23 void inputoptions(void);
24 void input_protdata(long);
25 void makeweights(void);
26 void prot_makevalues(long, pointarray, long, long, sequence, steptr);
29 void prot_inittable(void);
30 void alloc_pmatrix(long);
31 void make_pmatrix(double **, double **, double **, long, double, double,
33 void prot_nuview(node *);
34 void getthree(node *p, double thigh, double tlow);
35 void makenewv(node *);
38 void promlk_add(node *, node *, node *, boolean);
39 void promlk_re_move(node **, node **, boolean);
41 double prot_evaluate(node *);
42 void tryadd(node *, node **, node **);
43 void addpreorder(node *, node *, node *, boolean, boolean);
44 void restoradd(node *, node *, node *, double);
45 void tryrearr(node *, boolean *);
46 void repreorder(node *, boolean *);
47 void rearrange(node **);
48 void nodeinit(node *);
50 void travinit(node *);
53 void treevaluate(void);
54 void promlk_coordinates(node *, long *);
55 void promlk_drawline(long, double);
56 void promlk_printree(void);
57 void describe(node *);
58 void prot_reconstr(node *, long);
59 void rectrav(node *, long, long);
61 void promlk_treeout(node *);
62 void initpromlnode(node **, node **, node *, long, long, long *, long *,
63 initops, pointarray, pointarray, Char *, Char *, FILE *);
64 void tymetrav(node *, double *);
66 void free_all_protx(long, pointarray);
69 void reallocsites(void);
70 void prot_freetable(void);
71 void free_pmatrix(long sib);
72 /* function prototypes */
78 Char infilename[100], outfilename[100], intreename[100], outtreename[100],
79 catfilename[100], weightfilename[100];
81 long sites, weightsum, categs, datasets, ith, njumble, jumb, numtrees, shimotrees;
82 /* sites = number of sites in actual sequences
83 numtrees = number of user-defined trees */
84 long inseed, inseed0, mx, mx0, mx1;
85 boolean global, jumble, lngths, trout, usertree, weights, rctgry, ctgry,
86 auto_, progress, mulsets, firstset, hypstate, smoothit,
87 polishing, justwts, gama, invar, usejtt, usepmb, usepam;
88 tree curtree, bestree, bestree2;
90 double sumrates, cv, alpha, lambda, lambda1, invarfrac;
96 contribarr *contribution;
97 char aachar[26]="ARNDCQEGHILKMFPSTWYVBZX?*-";
99 long rcategs, nonodes2;
102 /* Local variables for maketree, propagated globally for C version: */
103 long k, maxwhich, col;
104 double like, bestyet, tdelta, lnlike, slope, curv, maxlogl;
105 boolean lastsp, smoothed, succeeded;
108 double expon1i[maxcategs], expon1v[maxcategs],
109 expon2i[maxcategs], expon2v[maxcategs];
116 /* Variables introduced to allow for protein probability calculations */
117 long max_num_sibs; /* maximum number of siblings used in a */
118 /* nuview calculation. determines size */
119 /* final size of pmatrices */
120 double *eigmat; /* eig matrix variable */
121 double **probmat; /* prob matrix variable */
122 double ****dpmatrix; /* derivative of pmatrix */
123 double ****ddpmatrix; /* derivative of xpmatrix */
124 double *****pmatrices; /* matrix of probabilities of protien */
125 /* conversion. The 5 subscripts refer */
126 /* to sibs, rcategs, categs, final and */
127 /* initial states, respectively. */
128 double freqaa[20]; /* amino acid frequencies */
130 /* this jtt matrix decomposition due to Elisabeth Tillier */
131 static double jtteigmat[] =
132 {0.0, -0.007031123, -0.006484345, -0.006086499, -0.005514432,
133 -0.00772664, -0.008643413, -0.010620756, -0.009965552, -0.011671808,
134 -0.012222418,-0.004589201, -0.013103714, -0.014048038, -0.003170582,
135 -0.00347935, -0.015311677, -0.016021194, -0.017991454, -0.018911888};
137 static double jttprobmat[20][20] =
138 {{0.076999996, 0.051000003, 0.043000004, 0.051999998, 0.019999996, 0.041,
139 0.061999994, 0.073999997, 0.022999999, 0.052000004, 0.090999997, 0.058999988,
140 0.024000007, 0.04, 0.050999992, 0.069, 0.059000006, 0.014000008, 0.032000004,
142 {0.015604455, -0.068062363, 0.020106264, 0.070723273, 0.011702977, 0.009674053,
143 0.074000798, -0.169750458, 0.005560808, -0.008208636, -0.012305869,
144 -0.063730179, -0.005674643, -0.02116828, 0.104586169, 0.016480839, 0.016765139,
145 0.005936994, 0.006046367, -0.0082877},
146 {-0.049778281, -0.007118197, 0.003801272, 0.070749616, 0.047506147,
147 0.006447017, 0.090522425, -0.053620432, -0.008508175, 0.037170603,
148 0.051805545, 0.015413608, 0.019939916, -0.008431976, -0.143511376,
149 -0.052486072, -0.032116542, -0.000860626, -0.02535993, 0.03843545},
150 {-0.028906423, 0.092952047, -0.009615343, -0.067870117, 0.031970392,
151 0.048338335, -0.054396304, -0.135916654, 0.017780083, 0.000129242,
152 0.031267424, 0.116333586, 0.007499746, -0.032153596, 0.033517051,
153 -0.013719269, -0.00347293, -0.003291821, -0.02158326, -0.008862168},
154 {0.037181176, -0.023106564, -0.004482225, -0.029899635, 0.118139633,
155 -0.032298569, -0.04683198, 0.05566988, -0.012622847, 0.002023096,
156 -0.043921088, -0.04792557, -0.003452711, -0.037744513, 0.020822974,
157 0.036580187, 0.02331425, -0.004807711, -0.017504496, 0.01086673},
158 {0.044754061, -0.002503471, 0.019452517, -0.015611487, -0.02152807,
159 -0.013131425, -0.03465365, -0.047928912, 0.020608851, 0.067843095,
160 -0.122130014, 0.002521499, 0.013021646, -0.082891087, -0.061590119,
161 0.016270856, 0.051468938, 0.002079063, 0.081019713, 0.082927944},
162 {0.058917882, 0.007320741, 0.025278141, 0.000357541, -0.002831285,
163 -0.032453034, -0.010177288, -0.069447924, -0.034467324, 0.011422358,
164 -0.128478324, 0.04309667, -0.015319944, 0.113302422, -0.035052393,
165 0.046885372, 0.06185183, 0.00175743, -0.06224497, 0.020282093},
166 {-0.014562092, 0.022522921, -0.007094389, 0.03480089, -0.000326144,
167 -0.124039037, 0.020577906, -0.005056454, -0.081841576, -0.004381786,
168 0.030826152, 0.091261631, 0.008878828, -0.02829487, 0.042718836,
169 -0.011180886, -0.012719227, -0.000753926, 0.048062375, -0.009399129},
170 {0.033789571, -0.013512235, 0.088010984, 0.017580292, -0.006608005,
171 -0.037836971, -0.061344686, -0.034268357, 0.018190209, -0.068484614,
172 0.120024744, -0.00319321, -0.001349477, -0.03000546, -0.073063759,
173 0.081912399, 0.0635245, 0.000197, -0.002481798, -0.09108114},
174 {-0.113947615, 0.019230545, 0.088819683, 0.064832765, 0.001801467,
175 -0.063829682, -0.072001633, 0.018429333, 0.057465965, 0.043901014,
176 -0.048050874, -0.001705918, 0.022637173, 0.017404665, 0.043877902,
177 -0.017089594, -0.058489485, 0.000127498, -0.029357194, 0.025943972},
178 {0.01512923, 0.023603725, 0.006681954, 0.012360216, -0.000181447,
179 -0.023011838, -0.008960024, -0.008533239, 0.012569835, 0.03216118,
180 0.061986403, -0.001919083, -0.1400832, -0.010669741, -0.003919454,
181 -0.003707024, -0.026806029, -0.000611603, -0.001402648, 0.065312824},
182 {-0.036405351, 0.020816769, 0.011408213, 0.019787053, 0.038897829,
183 0.017641789, 0.020858533, -0.006067252, 0.028617353, -0.064259496,
184 -0.081676567, 0.024421823, -0.028751676, 0.07095096, -0.024199434,
185 -0.007513119, -0.028108766, -0.01198095, 0.111761119, -0.076198809},
186 {0.060831772, 0.144097327, -0.069151377, 0.023754576, -0.003322955,
187 -0.071618574, 0.03353154, -0.02795295, 0.039519769, -0.023453968,
188 -0.000630308, -0.098024591, 0.017672997, 0.003813378, -0.009266499,
189 -0.011192111, 0.016013873, -0.002072968, -0.010022044, -0.012526904},
190 {-0.050776604, 0.092833081, 0.044069596, 0.050523021, -0.002628417,
191 0.076542572, -0.06388631, -0.00854892, -0.084725311, 0.017401063,
192 -0.006262541, -0.094457679, -0.002818678, -0.0044122, -0.002883973,
193 0.028729685, -0.004961596, -0.001498627, 0.017994575, -0.000232779},
194 {-0.01894566, -0.007760205, -0.015160993, -0.027254587, 0.009800903,
195 -0.013443561, -0.032896517, -0.022734138, -0.001983861, 0.00256111,
196 0.024823166, -0.021256768, 0.001980052, 0.028136263, -0.012364384,
197 -0.013782446, -0.013061091, 0.111173981, 0.021702122, 0.00046654},
198 {-0.009444193, -0.042106824, -0.02535015, -0.055125574, 0.006369612,
199 -0.02945416, -0.069922064, -0.067221068, -0.003004999, 0.053624311,
200 0.128862984, -0.057245803, 0.025550508, 0.087741073, -0.001119043,
201 -0.012036202, -0.000913488, -0.034864475, 0.050124813, 0.055534723},
202 {0.145782464, -0.024348311, -0.031216873, 0.106174443, 0.00202862,
203 0.02653866, -0.113657267, -0.00755018, 0.000307232, -0.051241158,
204 0.001310685, 0.035275877, 0.013308898, 0.002957626, -0.002925034,
205 -0.065362319, -0.071844582, 0.000475894, -0.000112419, 0.034097762},
206 {0.079840455, 0.018769331, 0.078685899, -0.084329807, -0.00277264,
207 -0.010099754, 0.059700608, -0.019209715, -0.010442992, -0.042100476,
208 -0.006020556, -0.023061786, 0.017246106, -0.001572858, -0.006703785,
209 0.056301316, -0.156787357, -0.000303638, 0.001498195, 0.051363455},
210 {0.049628261, 0.016475144, 0.094141653, -0.04444633, 0.005206131,
211 -0.001827555, 0.02195624, 0.013066683, -0.010415582, -0.022338403,
212 0.007837197, -0.023397671, -0.002507095, 0.005177694, 0.017109561,
213 -0.202340113, 0.069681441, 0.000120736, 0.002201146, 0.004670849},
214 {0.089153689, 0.000233354, 0.010826822, -0.004273519, 0.001440618,
215 0.000436077, 0.001182351, -0.002255508, -0.000700465, 0.150589876,
216 -0.003911914, -0.00050154, -0.004564983, 0.00012701, -0.001486973,
217 -0.018902754, -0.054748555, 0.000217377, -0.000319302, -0.162541651}};
220 static double pameigmat[] = {0.0, -0.002350753691875762, -0.002701991863800379,
221 -0.002931612442853115, -0.004262492032364507, -0.005395980482561625,
222 -0.007141172690079523, -0.007392844756151318, -0.007781761342200766,
223 -0.00810032066366362, -0.00875299712761124, -0.01048227332164386,
224 -0.01109594097332267, -0.01298616073142234, -0.01342036228188581,
225 -0.01552599145527578, -0.01658762802054814, -0.0174893445623765,
226 -0.01933280832903272, -0.02206353522613025};
228 static double pamprobmat[20][20] =
229 {{0.087683339901135, 0.04051291829598762, 0.04087846315185977,
230 0.04771603459744777, 0.03247095396561266, 0.03784612688594957,
231 0.0504933695604875, 0.0898249006830755, 0.03285885059543713,
232 0.0357514442352119, 0.0852464099207521, 0.07910313444070642,
233 0.01488243946396588, 0.04100101908956829, 0.05158026947089499,
234 0.06975497205982451, 0.05832757042475474, 0.00931264523877807,
235 0.03171540880870517, 0.06303972920984541},
236 {0.01943453646811026, -0.004492574160484092, 0.007694891061220776,
237 0.01278399096887701, 0.0106157418450234, 0.007542140341575122,
238 0.01326994069032819, 0.02615565199894889, 0.003123125764490066,
239 0.002204507682495444, -0.004782898215768979, 0.01204241965177619,
240 0.0007847400096924341, -0.03043626073172116, 0.01221202591902536,
241 0.01100527004684405, 0.01116495631339549, -0.0925364931988571,
242 -0.02622065387931562, 0.00843494142432107},
243 {0.01855357100209072, 0.01493642835763868, 0.0127983090766285,
244 0.0200533250704364, -0.1681898360107787, 0.01551657969909255,
245 0.02128060163107209, 0.03100633591848964, 0.00845480845269879,
246 0.000927149370785571, 0.00937207565817036, 0.03490557769673472,
247 0.00300443019551563, -0.02590837220264415, 0.01329376859943192,
248 0.006854110889741407, 0.01102593860528263, 0.003360844186685888,
249 -0.03459712356647764, 0.003351477369404443},
250 {0.02690642688200102, 0.02131745801890152, 0.0143626616005213,
251 0.02405101425725929, 0.05041008641436849, 0.01430925051050233,
252 0.02362114036816964, 0.04688381789373886, 0.005250115453626377,
253 -0.02040112168595516, -0.0942720776915669, 0.03773004996758644,
254 -0.00822831940782616, -0.1164872809439224, 0.02286281877257392,
255 0.02849551240669926, 0.01468856796295663, 0.02377110964207936,
256 -0.094380545436577, -0.02089068498518036},
257 {0.00930172577225213, 0.01493463068441099, 0.020186920775608,
258 0.02892154953912524, -0.01224593358361567, 0.01404228329986624,
259 0.02671186617119041, 0.04537535161795231, 0.02229995804098249,
260 -0.04635704133961575, -0.1966910360247138, 0.02796648065439046,
261 -0.02263484732621436, 0.0440490503242072, 0.01148782948302166,
262 0.01989170531824069, 0.001306805142981245, -0.005676690969116321,
263 0.07680476281625202, -0.07967537039721849},
264 {0.06602274245435476, -0.0966661981471856, -0.005241648783844579,
265 0.00859135188171146, -0.007762129660943368, -0.02888965572526196,
266 0.003592291525888222, 0.1668410669287673, -0.04082039290551406,
267 0.005233775047553415, -0.01758244726137135, -0.1493955762326898,
268 -0.00855819137835548, 0.004211419253492328, 0.01929306335052688,
269 0.03008056746359405, 0.0190444422412472, 0.005577189741419315,
270 0.0000874156155112068, 0.02634091459108298},
271 {0.01933897472880726, 0.05874583569377844, -0.02293534606228405,
272 -0.07206314017962175, -0.004580681581546643, -0.0628814337610561,
273 -0.0850783812795136, 0.07988417636610614, -0.0852798990133397,
274 0.01649047166155952, -0.05416647263757423, 0.1089834536254064,
275 0.005093403979413865, 0.02520300254161142, 0.0005951431406455604,
276 0.02441251821224675, 0.02796099482240553, -0.002574933994926502,
277 -0.007172237553012804, 0.03002455129086954},
278 {0.04041118479094272, -0.002476225672095412, -0.01494505811263243,
279 -0.03759443758599911, -0.00892246902492875, -0.003634714029239211,
280 -0.03085671837973749, -0.126176309029931, 0.005814031139083794,
281 0.01313561962646063, -0.04760487162503322, -0.0490563712725484,
282 -0.005082243450421558, -0.01213634309383557, 0.1806666927079249,
283 0.02111663336185495, 0.02963486860587087, -0.0000175020101657785,
284 0.01197155383597686, 0.0357526792184636},
285 {-0.01184769557720525, 0.01582776076338872, -0.006570708266564639,
286 -0.01471915653734024, 0.00894343616503608, 0.00562664968033149,
287 -0.01465878888356943, 0.05365282692645818, 0.00893509735776116,
288 -0.05879312944436473, 0.0806048683392995, -0.007722897986905326,
289 -0.001819943882718859, 0.0942535573077267, 0.07483883782251654,
290 0.004354639673913651, -0.02828804845740341, -0.001318222184691827,
291 -0.07613149604246563, -0.1251675867732172},
292 {0.00834167031558193, -0.01509357596974962, 0.007098172811092488,
293 0.03127677418040319, 0.001992448468465455, 0.00915441566808454,
294 0.03430175973499201, -0.0730648147535803, -0.001402707145575659,
295 0.04780949194330815, -0.1115035603461273, -0.01292297197609604,
296 -0.005056270550868528, 0.1112053349612027, -0.03801929822379964,
297 -0.001191241001736563, 0.01872874622910247, 0.0005314214903865993,
298 -0.0882576318311789, 0.07607183599610171},
299 {-0.01539460099727769, 0.04988596184297883, -0.01187240760647617,
300 -0.06987843637091853, -0.002490472846497859, 0.01009857892494956,
301 -0.07473588067847209, 0.0906009925879084, 0.1243612446505172,
302 0.02152806401345371, -0.03504879644860233, -0.06680752427613573,
303 -0.005574485153629651, 0.001518282948127752, -0.01999168507510701,
304 -0.01478606199529457, -0.02203749419458996, -0.00132680708294333,
305 -0.01137505997867614, 0.05332658773667142},
306 {-0.06104378736432388, 0.0869446603393548, -0.03298331234537257,
307 0.03128515657456024, 0.003906358569208259, 0.03578694104193928,
308 0.06241936133189683, 0.06182827284921748, -0.05566564263245907,
309 0.02640868588189002, -0.01349751243059039, -0.05507866642582638,
310 -0.006671347738489326, -0.001470096466016046, 0.05185743641479938,
311 -0.07494697511168257, -0.1175185439057584, -0.001188074094105709,
312 0.00937934805737347, 0.05024773745437657},
313 {-0.07252555582124737, -0.116554459356382, 0.003605361887406413,
314 -0.00836518656029184, 0.004615715410745561, 0.005105376617651312,
315 -0.00944938657024391, 0.05602449420950007, 0.02722719610561933,
316 0.01959357494748446, -0.0258655103753962, 0.1440733975689835,
317 0.01446782819722976, 0.003718896062070054, 0.05825843045655135,
318 -0.06230154142733073, -0.07833704962300169, 0.003160836143568724,
319 -0.001169873777936648, 0.03471745590503304},
320 {-0.03204352258752698, 0.01019272923862322, 0.04509668708733181,
321 0.05756522429120813, -0.0004601149081726732, -0.0984718150777423,
322 -0.01107826100664925, -0.005680277810520585, 0.01962359392320817,
323 0.01550006899131986, 0.05143956925922197, 0.02462476682588468,
324 -0.0888843861002653, -0.00171553583659411, 0.01606331750661664,
325 0.001176847743518958, -0.02070972978912828, -0.000341523293579971,
326 -0.002654732745607882, 0.02075709428885848},
327 {0.03595199666430258, -0.02800219615234468, -0.04341570015493925,
328 -0.0748275906176658, 0.0001051403676377422, 0.1137431321746627,
329 0.005852087565974318, 0.003443037513847801, -0.02481931657706633,
330 -0.003651181839831423, 0.03195794176786321, 0.04135411406392523,
331 -0.07562030263210619, 0.001769332364699, -0.01984381173403915,
332 -0.005029750745010152, 0.02649253902476472, 0.000518085571702734,
333 0.001062936684474851, 0.01295950668914449},
334 {-0.16164552322896, -0.0006050035060464324, 0.0258380054414968,
335 0.003188424740960557, -0.0002058911341821877, 0.03157555987384681,
336 -0.01678913462596107, 0.03096216145389774, -0.0133791110666919,
337 0.1125249625204277, -0.00769017706442472, -0.02653938062180483,
338 -0.002555329863523985, -0.00861833362947954, 0.01775148884754278,
339 0.02529310679774722, 0.0826243417011238, -0.0001036728183032624,
340 0.001963562313294209, -0.0935900561309786},
341 {0.1652394174588469, -0.002814245280784351, -0.0328982001821263,
342 -0.02000104712964131, 0.0002208121995725443, -0.02733462178511839,
343 0.02648078162927627, -0.01788316626401427, 0.01630747623755998,
344 0.1053849023838147, -0.005447706553811218, 0.01810876922536839,
345 -0.001808914710282444, -0.007687912115607397, -0.01332593672114388,
346 -0.02110750894891371, -0.07456116592983384, 0.000219072589592394,
347 0.001270886972191055, -0.1083616930749109},
348 {0.02453279389716254, -0.005820072356487439, 0.100260287284095,
349 0.01277522280305745, -0.003184943445296999, 0.05814689527984152,
350 -0.0934012278200201, -0.03017986487349484, -0.03136625380994165,
351 0.00988668352785117, -0.00358900410973142, -0.02017443675004764,
352 0.000915384582922184, -0.001460963415183106, -0.01370112443251124,
353 0.1130040979284457, -0.1196161771323699, -0.0005800211204222045,
354 -0.0006153403201024954, 0.00416806428223025},
355 {-0.0778089244252535, -0.007055161182430869, -0.0349307504860869,
356 -0.0811915584276571, -0.004689825871599125, -0.03726108871471753,
357 0.1072225647141469, -0.00917015113070944, 0.01381628985996913,
358 -0.00123227881492089, 0.001815954515275675, 0.005708744099349901,
359 -0.0001448985044877925, -0.001306578795561384, -0.006992743514185243,
360 0.1744720240732789, -0.05353628497814023, -0.0007613684227234787,
361 -0.0003550282315997644, 0.01340106423804634},
362 {-0.0159527329868513, -0.007622151568160798, -0.1389875105184963,
363 0.1165051999914764, -0.002217810389087748, 0.01550003226513692,
364 -0.07427664222230566, -0.003371438498619264, 0.01385754771325365,
365 0.004759020167383304, 0.001624078805220564, 0.02011638303109029,
366 -0.001717827082842178, -0.0007424036708598594, -0.003978884451898934,
367 0.0866418927301209, -0.01280817739158123, -0.00023039242454603,
368 0.002309205802479111, 0.0005926106991001195}};
370 /* this pmb matrix decomposition due to Elisabeth Tillier */
371 static double pmbeigmat[20] =
372 {0.0000001586972220,-1.8416770496147100, -1.6025046986139100,-1.5801012515121300,
373 -1.4987794099715900,-1.3520794233801900,-1.3003469390479700,-1.2439503327631300,
374 -1.1962574080244200,-1.1383730501367500,-1.1153278910708000,-0.4934843510654760,
375 -0.5419014550215590,-0.9657997830826700,-0.6276075673757390,-0.6675927795018510,
376 -0.6932641383465870,-0.8897872681859630,-0.8382698977371710,-0.8074694642446040};
378 static double pmbprobmat[20][20] =
379 {{0.0771762457248147,0.0531913844998640,0.0393445076407294,0.0466756566755510,
380 0.0286348361997465,0.0312327748383639,0.0505410248721427,0.0767106611472993,
381 0.0258916271688597,0.0673140562194124,0.0965705469252199,0.0515979465932174,
382 0.0250628079438675,0.0503492018628350,0.0399908189418273,0.0641898881894471,
383 0.0517539616710987,0.0143507440546115,0.0357994592438322,0.0736218495862984},
384 {0.0368263046116572,-0.0006728917107827,0.0008590805287740,-0.0002764255356960,
385 0.0020152937187455,0.0055743720652960,0.0003213317669367,0.0000449190281568,
386 -0.0004226254397134,0.1805040629634510,-0.0272246813586204,0.0005904606533477,
387 -0.0183743200073889,-0.0009194625608688,0.0008173657533167,-0.0262629806302238,
388 0.0265738757209787,0.0002176606241904,0.0021315644838566,-0.1823229927207580},
389 {-0.0194800075560895,0.0012068088610652,-0.0008803318319596,-0.0016044273960017,
390 -0.0002938633803197,-0.0535796754602196,0.0155163896648621,-0.0015006360762140,
391 0.0021601372013703,0.0268513218744797,-0.1085292493742730,0.0149753083138452,
392 0.1346457366717310,-0.0009371698759829,0.0013501708044116,0.0346352293103622,
393 -0.0276963770242276,0.0003643142783940,0.0002074817333067,-0.0174108903914110},
394 {0.0557839400850153,0.0023271577185437,0.0183481103396687,0.0023339480096311,
395 0.0002013267015151,-0.0227406863569852,0.0098644845475047,0.0064721276774396,
396 0.0001389408104210,-0.0473713878768274,-0.0086984445005797,0.0026913674934634,
397 0.0283724052562196,0.0001063665179457,0.0027442574779383,-0.1875312134708470,
398 0.1279864877057640,0.0005103347834563,0.0003155113168637,0.0081451082759554},
399 {0.0037510125027265,0.0107095920636885,0.0147305410328404,-0.0112351252180332,
400 -0.0001500408626446,-0.1523450933729730,0.0611532413339872,-0.0005496748939503,
401 0.0048714378736644,-0.0003826320053999,0.0552010244407311,0.0482555671001955,
402 -0.0461664995115847,-0.0021165008617978,-0.0004574454232187,0.0233755883688949,
403 -0.0035484915422384,0.0009090698422851,0.0013840637687758,-0.0073895139302231},
404 {-0.0111512564930024,0.1025460064723080,0.0396772456883791,-0.0298408501361294,
405 -0.0001656742634733,-0.0079876311843289,0.0712644184507945,-0.0010780604625230,
406 -0.0035880882043592,0.0021070399334252,0.0016716329894279,-0.1810123023850110,
407 0.0015141703608724,-0.0032700852781804,0.0035503782441679,0.0118634302028026,
408 0.0044561606458028,-0.0001576678495964,0.0023470722225751,-0.0027457045397157},
409 {0.1474525743949170,-0.0054432538500293,0.0853848892349828,-0.0137787746207348,
410 -0.0008274830358513,0.0042248844582553,0.0019556229305563,-0.0164191435175148,
411 -0.0024501858854849,0.0120908948084233,-0.0381456105972653,0.0101271614855119,
412 -0.0061945941321859,0.0178841099895867,-0.0014577779202600,-0.0752120602555032,
413 -0.1426985695849920,0.0002862275078983,-0.0081191734261838,0.0313401149422531},
414 {0.0542034611735289,-0.0078763926211829,0.0060433542506096,0.0033396210615510,
415 0.0013965072374079,0.0067798903832256,-0.0135291136622509,-0.0089982442731848,
416 -0.0056744537593887,-0.0766524225176246,0.1881210263933930,-0.0065875518675173,
417 0.0416627569300375,-0.0953804133524747,-0.0012559228448735,0.0101622644292547,
418 -0.0304742453119050,0.0011702318499737,0.0454733434783982,-0.1119239362388150},
419 {0.1069409037912470,0.0805064400880297,-0.1127352030714600,0.1001181253523260,
420 -0.0021480427488769,-0.0332884841459003,-0.0679837575848452,-0.0043812841356657,
421 0.0153418716846395,-0.0079441315103188,-0.0121766182046363,-0.0381127991037620,
422 -0.0036338726532673,0.0195324059593791,-0.0020165963699984,-0.0061222685010268,
423 -0.0253761448771437,-0.0005246410999057,-0.0112205170502433,0.0052248485517237},
424 {-0.0325247648326262,0.0238753651653669,0.0203684886605797,0.0295666232678825,
425 -0.0003946714764213,-0.0157242718469554,-0.0511737848084862,0.0084725632040180,
426 -0.0167068828528921,0.0686962159427527,-0.0659702890616198,-0.0014289912494271,
427 -0.0167000964093416,-0.1276689083678200,0.0036575057830967,-0.0205958145531018,
428 0.0000368919612829,0.0014413626622426,0.1064360941926030,0.0863372661517408},
429 {-0.0463777468104402,0.0394712148670596,0.1118686750747160,0.0440711686389031,
430 -0.0026076286506751,-0.0268454015202516,-0.1464943067133240,-0.0137514051835380,
431 -0.0094395514284145,-0.0144124844774228,0.0249103379323744,-0.0071832157138676,
432 0.0035592787728526,0.0415627419826693,0.0027040097365669,0.0337523666612066,
433 0.0316121324137152,-0.0011350177559026,-0.0349998884574440,-0.0302651879823361},
434 {0.0142360925194728,0.0413145623127025,0.0324976427846929,0.0580930922002398,
435 -0.0586974207121084,0.0202001168873069,0.0492204086749069,0.1126593173463060,
436 0.0116620013776662,-0.0780333711712066,-0.1109786767320410,0.0407775100936731,
437 -0.0205013161312652,-0.0653458585025237,0.0347351829703865,0.0304448983224773,
438 0.0068813748197884,-0.0189002309261882,-0.0334507528405279,-0.0668143558699485},
439 {-0.0131548829657936,0.0044244322828034,-0.0050639951827271,-0.0038668197633889,
440 -0.1536822386530220,0.0026336969165336,0.0021585651200470,-0.0459233839062969,
441 0.0046854727140565,0.0393815434593599,0.0619554007991097,0.0027456299925622,
442 0.0117574347936383,0.0373018612990383,0.0024818527553328,-0.0133956606027299,
443 -0.0020457128424105,0.0154178819990401,0.0246524142683911,0.0275363065682921},
444 {-0.1542307272455030,0.0364861558267547,-0.0090880407008181,0.0531673937889863,
445 0.0157585615170580,0.0029986538457297,0.0180194047699875,0.0652152443589317,
446 0.0266842840376180,0.0388457366405908,0.0856237634510719,0.0126955778952183,
447 0.0099593861698250,-0.0013941794862563,0.0294065511237513,-0.1151906949298290,
448 -0.0852991447389655,0.0028699120202636,-0.0332087026659522,0.0006811857297899},
449 {0.0281300736924501,-0.0584072081898638,-0.0178386569847853,-0.0536470338171487,
450 -0.0186881656029960,-0.0240008730656106,-0.0541064820498883,0.2217137098936020,
451 -0.0260500001542033,0.0234505236798375,0.0311127151218573,-0.0494139126682672,
452 0.0057093465049849,0.0124937286655911,-0.0298322975915689,0.0006520211333102,
453 -0.0061018680727128,-0.0007081999479528,-0.0060523759094034,0.0215845995364623},
454 {0.0295321046399105,-0.0088296411830544,-0.0065057049917325,-0.0053478115612781,
455 -0.0100646496794634,-0.0015473619084872,0.0008539960632865,-0.0376381933046211,
456 -0.0328135588935604,0.0672161874239480,0.0667626853916552,-0.0026511651464901,
457 0.0140451514222062,-0.0544836996133137,0.0427485157912094,0.0097455780205802,
458 0.0177309072915667,-0.0828759701187452,-0.0729504795471370,0.0670731961252313},
459 {0.0082646581043963,-0.0319918630534466,-0.0188454445200422,-0.0374976353856606,
460 0.0037131290686848,-0.0132507796987883,-0.0306958830735725,-0.0044119395527308,
461 -0.0140786756619672,-0.0180512599925078,-0.0208243802903953,-0.0232202769398931,
462 -0.0063135878270273,0.0110442171178168,0.1824538048228460,-0.0006644614422758,
463 -0.0069909097436659,0.0255407650654681,0.0099119399501151,-0.0140911517070698},
464 {0.0261344441524861,-0.0714454044548650,0.0159436926233439,0.0028462736216688,
465 -0.0044572637889080,-0.0089474834434532,-0.0177570282144517,-0.0153693244094452,
466 0.1160919467206400,0.0304911481385036,0.0047047513411774,-0.0456535116423972,
467 0.0004491494948617,-0.0767108879444462,-0.0012688533741441,0.0192445965934123,
468 0.0202321954782039,0.0281039933233607,-0.0590403018490048,0.0364080426546883},
469 {0.0115826306265004,0.1340228176509380,-0.0236200652949049,-0.1284484655137340,
470 -0.0004742338006503,0.0127617346949511,-0.0428560878860394,0.0060030732454125,
471 0.0089182609926781,0.0085353834972860,0.0048464809638033,0.0709740071429510,
472 0.0029940462557054,-0.0483434904493132,-0.0071713680727884,-0.0036840391887209,
473 0.0031454003250096,0.0246243550241551,-0.0449551277644180,0.0111449232769393},
474 {0.0140356721886765,-0.0196518236826680,0.0030517022326582,0.0582672093364850,
475 -0.0000973895685457,0.0021704767224292,0.0341806268602705,-0.0152035987563018,
476 -0.0903198657739177,0.0259623214586925,0.0155832497882743,-0.0040543568451651,
477 0.0036477631918247,-0.0532892744763217,-0.0142569373662724,0.0104500681408622,
478 0.0103483945857315,0.0679534422398752,-0.0768068882938636,0.0280289727046158}}
486 eigmat = (double *) Malloc (20 * sizeof(double));
487 for (l = 0; l <= 19; l++)
489 eigmat[l] = jtteigmat[l]*100.0;
492 eigmat[l] = pmbeigmat[l];
494 eigmat[l] = pameigmat[l]*100.0;
496 probmat = (double **) Malloc (20 * sizeof(double *));
497 for (l = 0; l < 20; l++)
498 probmat[l] = (double *) Malloc (20 * sizeof(double));
499 for (l = 0; l <= 19; l++)
500 for (m= 0; m <= 19; m++)
502 probmat[l][m] = jttprobmat[l][m];
505 probmat[l][m] = pmbprobmat[l][m];
507 probmat[l][m] = pamprobmat[l][m];
509 } /* init_protmats */
514 /* interactively set options */
515 long i, loopcount, loopcount2;
518 boolean didchangecat, didchangercat;
521 fprintf(outfile, "\nAmino acid sequence\n");
522 fprintf(outfile, " Maximum Likelihood method with molecular ");
523 fprintf(outfile, "clock, version %s\n\n", VERSION);
528 didchangecat = false;
530 didchangercat = false;
555 printf("\nAmino acid sequence\n");
556 printf(" Maximum Likelihood method with molecular clock, version %s\n\n",
558 printf("Settings for this run:\n");
559 printf(" U Search for best tree?");
561 printf(" No, use user trees in input file\n");
564 printf(" P JTT, PMB or PAM probability model? %s\n",
565 usejtt ? "Jones-Taylor-Thornton" :
566 usepmb ? "Henikoff/Tillier PMB" : "Dayhoff PAM");
568 printf(" L Use lengths from user tree?");
574 printf(" C One category of substitution rates?");
578 printf(" %ld categories\n", categs);
579 printf(" R Rate variation among sites?");
581 printf(" constant rate of change\n");
584 printf(" Gamma distributed rates\n");
587 printf(" Gamma+Invariant sites\n");
589 printf(" user-defined HMM of rates\n");
591 printf(" A Rates at adjacent sites correlated?");
593 printf(" No, they are independent\n");
595 printf(" Yes, mean block length =%6.1f\n", 1.0 / lambda);
598 printf(" G Global rearrangements?");
604 printf(" W Sites weighted? %s\n",
605 (weights ? "Yes" : "No"));
607 printf(" J Randomize input order of sequences?");
609 printf(" Yes (seed = %8ld, %3ld times)\n", inseed0, njumble);
611 printf(" No. Use input order\n");
613 printf(" M Analyze multiple data sets?");
615 printf(" Yes, %2ld %s\n", datasets,
616 (justwts ? "sets of weights" : "data sets"));
619 printf(" I Input sequences interleaved?");
623 printf(" No, sequential\n");
624 printf(" 0 Terminal type (IBM PC, ANSI, none)?");
629 if (!(ibmpc || ansi))
631 printf(" 1 Print out the data at start of run");
636 printf(" 2 Print indications of progress of run");
641 printf(" 3 Print out tree");
646 printf(" 4 Write out trees onto tree file?");
651 printf(" 5 Reconstruct hypothetical sequences? %s\n",
652 (hypstate ? "Yes" : "No"));
653 printf("\nAre these settings correct? ");
654 printf("(type Y or the letter for one to change)\n");
656 phyFillScreenColor();
658 scanf("%c%*[^\n]", &ch);
666 if (strchr("UPCRJAFWGLTMI012345", ch) != NULL){
672 printf("\nSitewise user-assigned categories:\n\n");
677 rate = (double *) Malloc( categs * sizeof(double));
679 initcategs(categs, rate);
719 lambda1 = 1.0 - lambda;
734 initjumble(&inseed, &inseed0, seed, &njumble);
743 usertree = !usertree;
749 printf("Multiple data sets or multiple weights?");
752 printf(" (type D or W)\n");
754 phyFillScreenColor();
756 scanf("%c%*[^\n]", &ch2);
761 countup(&loopcount2, 10);
762 } while ((ch2 != 'W') && (ch2 != 'D'));
763 justwts = (ch2 == 'W');
765 justweights(&datasets);
767 initdatasets(&datasets);
770 initjumble(&inseed, &inseed0, seed, &njumble);
776 interleaved = !interleaved;
780 initterminal(&ibmpc, &ansi);
784 printdata = !printdata;
788 progress = !progress;
792 treeprint = !treeprint;
800 hypstate = !hypstate;
804 printf("Not a possible option!\n");
806 countup(&loopcount, 100);
812 "\nCoefficient of variation of substitution rate among sites (must be positive)\n");
814 " In gamma distribution parameters, this is 1/(square root of alpha)\n");
816 phyFillScreenColor();
818 scanf("%lf%*[^\n]", &cv);
820 countup(&loopcount, 10);
822 alpha = 1.0 / (cv * cv);
827 printf("\nRates in HMM");
829 printf(" (including one for invariant sites)");
836 probcat = (double *) Malloc(rcategs * sizeof(double));
837 rrate = (double *) Malloc(rcategs * sizeof(double));
838 didchangercat = true;
840 initgammacat(rcategs, alpha, rrate, probcat);
845 printf("Fraction of invariant sites?\n");
846 scanf("%lf%*[^\n]", &invarfrac);
848 countup (&loopcount, 10);
849 } while ((invarfrac <= 0.0) || (invarfrac >= 1.0));
850 initgammacat(rcategs-1, alpha, rrate, probcat);
851 for (i = 0; i < rcategs-1; i++)
852 probcat[i] = probcat[i]*(1.0-invarfrac);
853 probcat[rcategs-1] = invarfrac;
854 rrate[rcategs-1] = 0.0;
856 initcategs(rcategs, rrate);
857 initprobcat(rcategs, &probsum, probcat);
862 rrate = Malloc( rcategs*sizeof(double));
863 probcat = Malloc( rcategs*sizeof(double));
868 rate = Malloc( categs*sizeof(double));
877 /* calculate amino acid frequencies based on eigmat */
881 for (i = 0; i <= 19; i++)
882 if (fabs(eigmat[i]) < fabs(eigmat[mineig]))
884 memcpy(freqaa, probmat[mineig], 20 * sizeof(double));
885 for (i = 0; i <= 19; i++)
886 freqaa[i] = fabs(freqaa[i]);
887 } /* makeprotfreqs */
892 for (i = 0; i < spp; i++)
893 y[i] = (char *)Malloc(sites * sizeof(char));
894 enterorder = (long *)Malloc(spp*sizeof(long));
895 weight = (long *)Malloc(sites*sizeof(long));
896 category = (long *)Malloc(sites*sizeof(long));
897 alias = (long *)Malloc(sites*sizeof(long));
898 aliasweight = (long *)Malloc(sites*sizeof(long));
899 ally = (long *)Malloc(sites*sizeof(long));
900 location = (long *)Malloc(sites*sizeof(long));
901 for (i = 0; i < sites; i++)
903 for (i = 0; i < sites; i++)
912 y = (Char **)Malloc(spp*sizeof(Char *));
913 nayme = (naym *)Malloc(spp*sizeof(naym));
914 for (i = 0; i < spp; i++)
915 y[i] = (char *)Malloc(sites * sizeof(char));
916 enterorder = (long *)Malloc(spp*sizeof(long));
917 weight = (long *)Malloc(sites*sizeof(long));
918 category = (long *)Malloc(sites*sizeof(long));
919 alias = (long *)Malloc(sites*sizeof(long));
920 aliasweight = (long *)Malloc(sites*sizeof(long));
921 ally = (long *)Malloc(sites*sizeof(long));
922 location = (long *)Malloc(sites*sizeof(long));
928 /* initializes variables */
930 inputnumbers(&spp, &sites, &nonodes, 1);
934 fprintf(outfile, "%2ld species, %3ld sites\n", spp, sites);
935 alloctree(&curtree.nodep, nonodes, usertree);
939 alloctree(&bestree.nodep, nonodes, 0);
942 alloctree(&bestree2.nodep, nonodes, 0);
951 samenumsp(&sites, ith);
955 for (i = 0; i < sites; i++)
957 for (i = 0; i < sites; i++)
960 if (justwts || weights)
961 inputweights(sites, weight, &weights);
963 for (i = 0; i < sites; i++)
964 weightsum += weight[i];
965 if ((ctgry && categs > 1) && (firstset || !justwts)) {
966 inputcategs(0, sites, category, categs, "ProMLK");
968 printcategs(outfile, sites, category, "Site categories");
970 if (weights && printdata)
971 printweights(outfile, 0, sites, weight, "Sites");
972 fprintf(outfile, "%s model of amino acid change\n\n",
973 (usejtt ? "Jones-Taylor-Thornton" :
974 usepmb ? "Henikoff/Tillier PMB" : "Dayhoff PAM"));
978 void input_protdata(long chars)
980 /* input the names and sequences for each species */
982 long i, j, k, l, basesread, basesnew;
984 boolean allread, done;
987 headings(chars, "Sequences", "---------");
992 /* eat white space -- if the separator line has spaces on it*/
994 charstate = gettc(infile);
995 } while (charstate == ' ' || charstate == '\t');
996 ungetc(charstate, infile);
1001 if ((interleaved && basesread == 0) || !interleaved)
1003 j = (interleaved) ? basesread : 0;
1005 while (!done && !eoff(infile)) {
1008 while (j < chars && !(eoln(infile) || eoff(infile))) {
1009 charstate = gettc(infile);
1010 if (charstate == '\n' || charstate == '\t')
1012 if (charstate == ' ' || (charstate >= '0' && charstate <= '9'))
1014 uppercase(&charstate);
1015 if ((strchr("ABCDEFGHIKLMNPQRSTVWXYZ*?-", charstate)) == NULL){
1016 printf("ERROR: bad amino acid: %c at position %ld of species %ld\n",
1018 if (charstate == '.') {
1019 printf(" Periods (.) may not be used as gap characters.\n");
1020 printf(" The correct gap character is (-)\n");
1025 y[i - 1][j - 1] = charstate;
1031 else if (j == chars)
1034 if (interleaved && i == 1)
1039 if ((interleaved && j != basesnew) ||
1040 (!interleaved && j != chars)) {
1041 printf("ERROR: SEQUENCES OUT OF ALIGNMENT AT POSITION %ld.\n", j);
1048 basesread = basesnew;
1049 allread = (basesread == chars);
1051 allread = (i > spp);
1055 for (i = 1; i <= ((chars - 1) / 60 + 1); i++) {
1056 for (j = 1; j <= spp; j++) {
1057 for (k = 0; k < nmlngth; k++)
1058 putc(nayme[j - 1][k], outfile);
1059 fprintf(outfile, " ");
1063 for (k = (i - 1) * 60 + 1; k <= l; k++) {
1064 if (j > 1 && y[j - 1][k - 1] == y[0][k - 1])
1067 charstate = y[j - 1][k - 1];
1068 putc(charstate, outfile);
1069 if (k % 10 == 0 && k % 60 != 0)
1072 putc('\n', outfile);
1074 putc('\n', outfile);
1076 putc('\n', outfile);
1077 } /* input_protdata */
1082 /* make up weights vector to avoid duplicate computations */
1085 for (i = 1; i <= sites; i++) {
1088 aliasweight[i - 1] = weight[i - 1];
1089 location[i - 1] = 0;
1091 sitesort2(sites, aliasweight);
1092 sitecombine2(sites, aliasweight);
1093 sitescrunch2(sites, 1, 2, aliasweight);
1094 for (i = 1; i <= sites; i++) {
1095 if (aliasweight[i - 1] > 0)
1098 for (i = 1; i <= endsite; i++) {
1099 ally[alias[i - 1] - 1] = alias[i - 1];
1100 location[alias[i - 1] - 1] = i;
1102 contribution = (contribarr *) Malloc( endsite*sizeof(contribarr));
1106 void prot_makevalues(long categs, pointarray treenode, long endsite,
1107 long spp, sequence y, steptr alias)
1109 /* set up fractional likelihoods at tips */
1110 /* a version of makevalues2 found in seq.c */
1115 for (k = 0; k < endsite; k++) {
1117 for (i = 0; i < spp; i++) {
1118 for (l = 0; l < categs; l++) {
1119 memset(treenode[i]->protx[k][l], 0, sizeof(double)*20);
1120 switch (y[i][j - 1]) {
1123 treenode[i]->protx[k][l][0] = 1.0;
1127 treenode[i]->protx[k][l][(long)arginine - (long)alanine] = 1.0;
1131 treenode[i]->protx[k][l][(long)asparagine - (long)alanine] = 1.0;
1135 treenode[i]->protx[k][l][(long)aspartic - (long)alanine] = 1.0;
1139 treenode[i]->protx[k][l][(long)cysteine - (long)alanine] = 1.0;
1143 treenode[i]->protx[k][l][(long)glutamine - (long)alanine] = 1.0;
1147 treenode[i]->protx[k][l][(long)glutamic - (long)alanine] = 1.0;
1151 treenode[i]->protx[k][l][(long)glycine - (long)alanine] = 1.0;
1155 treenode[i]->protx[k][l][(long)histidine - (long)alanine] = 1.0;
1159 treenode[i]->protx[k][l][(long)isoleucine - (long)alanine] = 1.0;
1163 treenode[i]->protx[k][l][(long)leucine - (long)alanine] = 1.0;
1167 treenode[i]->protx[k][l][(long)lysine - (long)alanine] = 1.0;
1171 treenode[i]->protx[k][l][(long)methionine - (long)alanine] = 1.0;
1175 treenode[i]->protx[k][l][(long)phenylalanine - (long)alanine] = 1.0;
1179 treenode[i]->protx[k][l][(long)proline - (long)alanine] = 1.0;
1183 treenode[i]->protx[k][l][(long)serine - (long)alanine] = 1.0;
1187 treenode[i]->protx[k][l][(long)threonine - (long)alanine] = 1.0;
1191 treenode[i]->protx[k][l][(long)tryptophan - (long)alanine] = 1.0;
1195 treenode[i]->protx[k][l][(long)tyrosine - (long)alanine] = 1.0;
1199 treenode[i]->protx[k][l][(long)valine - (long)alanine] = 1.0;
1203 treenode[i]->protx[k][l][(long)asparagine - (long)alanine] = 1.0;
1204 treenode[i]->protx[k][l][(long)aspartic - (long)alanine] = 1.0;
1208 treenode[i]->protx[k][l][(long)glutamine - (long)alanine] = 1.0;
1209 treenode[i]->protx[k][l][(long)glutamic - (long)alanine] = 1.0;
1212 case 'X': /* unknown aa */
1213 for (b = 0; b <= 19; b++)
1214 treenode[i]->protx[k][l][b] = 1.0;
1217 case '?': /* unknown aa */
1218 for (b = 0; b <= 19; b++)
1219 treenode[i]->protx[k][l][b] = 1.0;
1222 case '*': /* stop codon symbol */
1223 for (b = 0; b <= 19; b++)
1224 treenode[i]->protx[k][l][b] = 1.0;
1227 case '-': /* deletion event-absent data or aa */
1228 for (b = 0; b <= 19; b++)
1229 treenode[i]->protx[k][l][b] = 1.0;
1235 } /* prot_makevalues */
1242 /* reads the input data */
1243 if (!justwts || firstset)
1245 if (!justwts || firstset)
1246 input_protdata(sites);
1248 setuptree2(curtree);
1250 setuptree2(bestree);
1252 setuptree2(bestree2);
1254 grcategs = (categs > rcategs) ? categs : rcategs;
1255 prot_allocx(nonodes, grcategs, curtree.nodep, usertree);
1257 prot_allocx(nonodes, grcategs, bestree.nodep, 0);
1259 prot_allocx(nonodes, grcategs, bestree2.nodep, 0);
1261 prot_makevalues(rcategs, curtree.nodep, endsite, spp, y, alias);
1264 void prot_freetable(void)
1267 for (j = 0; j < rcategs; j++) {
1268 for (k = 0; k < categs; k++) {
1269 for (l = 0; l < 20; l++)
1270 free(ddpmatrix[j][k][l]);
1271 free(ddpmatrix[j][k]);
1277 for (j = 0; j < rcategs; j++) {
1278 for (k = 0; k < categs; k++) {
1279 for (l = 0; l < 20; l++)
1280 free(dpmatrix[j][k][l]);
1281 free(dpmatrix[j][k]);
1288 for (j = 0; j < rcategs; j++)
1292 for ( i = 0 ; i < max_num_sibs ; i++ )
1297 void prot_inittable()
1299 /* Define a lookup table. Precompute values and print them out in tables */
1300 /* Allocate memory for the pmatrices, dpmatices and ddpmatrices */
1304 /* Allocate memory for pmatrices, the array of pointers to pmatrices */
1306 pmatrices = (double *****) Malloc (spp * sizeof(double ****));
1308 /* Allocate memory for the first 2 pmatrices, the matrix of conversion */
1309 /* probabilities, but only once per run (aka not on the second jumble. */
1314 /* Allocate memory for one dpmatrix, the first derivative matrix */
1316 dpmatrix = (double ****) Malloc( rcategs * sizeof(double ***));
1317 for (j = 0; j < rcategs; j++) {
1318 dpmatrix[j] = (double ***) Malloc( categs * sizeof(double **));
1319 for (k = 0; k < categs; k++) {
1320 dpmatrix[j][k] = (double **) Malloc( 20 * sizeof(double *));
1321 for (l = 0; l < 20; l++)
1322 dpmatrix[j][k][l] = (double *) Malloc( 20 * sizeof(double));
1326 /* Allocate memory for one ddpmatrix, the second derivative matrix */
1327 ddpmatrix = (double ****) Malloc( rcategs * sizeof(double ***));
1328 for (j = 0; j < rcategs; j++) {
1329 ddpmatrix[j] = (double ***) Malloc( categs * sizeof(double **));
1330 for (k = 0; k < categs; k++) {
1331 ddpmatrix[j][k] = (double **) Malloc( 20 * sizeof(double *));
1332 for (l = 0; l < 20; l++)
1333 ddpmatrix[j][k][l] = (double *) Malloc( 20 * sizeof(double));
1337 /* Allocate memory and assign values to tbl, the matrix of possible rates*/
1339 tbl = (double **) Malloc( rcategs * sizeof(double *));
1340 for (j = 0; j < rcategs; j++)
1341 tbl[j] = (double *) Malloc( categs * sizeof(double));
1343 for (j = 0; j < rcategs; j++)
1344 for (k = 0; k < categs; k++)
1345 tbl[j][k] = rrate[j]*rate[k];
1348 for (i = 0; i < endsite; i++) {
1349 for (j = 0; j < rcategs; j++)
1350 sumrates += aliasweight[i] * probcat[j]
1351 * tbl[j][category[alias[i] - 1] - 1];
1353 sumrates /= (double)sites;
1354 for (j = 0; j < rcategs; j++)
1355 for (k = 0; k < categs; k++) {
1356 tbl[j][k] /= sumrates;
1362 if (gama || invar) {
1363 fprintf(outfile, "\nDiscrete approximation to gamma distributed rates\n");
1365 " Coefficient of variation of rates = %f (alpha = %f)\n", cv, alpha);
1368 fprintf(outfile, "\nState in HMM Rate of change Probability\n\n");
1369 for (i = 0; i < rcategs; i++)
1370 if (probcat[i] < 0.0001)
1371 fprintf(outfile, "%9ld%16.3f%20.6f\n", i+1, rrate[i], probcat[i]);
1372 else if (probcat[i] < 0.001)
1373 fprintf(outfile, "%9ld%16.3f%19.5f\n", i+1, rrate[i], probcat[i]);
1374 else if (probcat[i] < 0.01)
1375 fprintf(outfile, "%9ld%16.3f%18.4f\n", i+1, rrate[i], probcat[i]);
1377 fprintf(outfile, "%9ld%16.3f%17.3f\n", i+1, rrate[i], probcat[i]);
1378 putc('\n', outfile);
1381 "Expected length of a patch of sites having the same rate = %8.3f\n",
1383 putc('\n', outfile);
1387 fprintf(outfile, "\nSite category Rate of change\n\n");
1388 for (k = 0; k < categs; k++)
1389 fprintf(outfile, "%9ld%16.3f\n", k+1, rate[k]);
1390 fprintf(outfile, "\n\n");
1392 } /* prot_inittable */
1394 void free_pmatrix(long sib)
1398 for (j = 0; j < rcategs; j++) {
1399 for (k = 0; k < categs; k++) {
1400 for (l = 0; l < 20; l++)
1401 free(pmatrices[sib][j][k][l]);
1402 free(pmatrices[sib][j][k]);
1404 free(pmatrices[sib][j]);
1406 free(pmatrices[sib]);
1409 void alloc_pmatrix(long sib)
1411 /* Allocate memory for a new pmatrix. Called iff num_sibs>max_num_sibs */
1413 double ****temp_matrix;
1415 temp_matrix = (double ****) Malloc (rcategs * sizeof(double ***));
1416 for (j = 0; j < rcategs; j++) {
1417 temp_matrix[j] = (double ***) Malloc(categs * sizeof(double **));
1418 for (k = 0; k < categs; k++) {
1419 temp_matrix[j][k] = (double **) Malloc(20 * sizeof (double *));
1420 for (l = 0; l < 20; l++)
1421 temp_matrix[j][k][l] = (double *) Malloc(20 * sizeof(double));
1424 pmatrices[sib] = temp_matrix;
1426 } /* alloc_pmatrix */
1429 void make_pmatrix(double **matrix, double **dmat, double **ddmat,
1430 long derivative, double lz, double rat,
1431 double *eigmat, double **probmat)
1433 /* Computes the R matrix such that matrix[m][l] is the joint probability */
1435 /* Computes a P matrix such that matrix[m][l] is the conditional */
1436 /* probability of m given l. This is accomplished by dividing all terms */
1437 /* in the R matrix by freqaa[m], the frequency of l. */
1439 long k, l, m; /* (l) original character state */
1440 /* (m) final character state */
1441 /* (k) lambda counter */
1442 double p0, p1, p2, q;
1443 double elambdat[20], delambdat[20], ddelambdat[20];
1444 /* exponential term for matrix */
1445 /* and both derivative matrices */
1447 for (k = 0; k <= 19; k++) {
1448 elambdat[k] = exp(lz * rat * eigmat[k]);
1449 if(derivative != 0) {
1450 delambdat[k] = (elambdat[k] * rat * eigmat[k]);
1451 ddelambdat[k] = (delambdat[k] * rat * eigmat[k]);
1454 for (m = 0; m <= 19; m++) {
1455 for (l = 0; l <= 19; l++) {
1459 for (k = 0; k <= 19; k++) {
1460 q = probmat[k][m] * probmat[k][l];
1461 p0 += (q * elambdat[k]);
1462 if(derivative !=0) {
1463 p1 += (q * delambdat[k]);
1464 p2 += (q * ddelambdat[k]);
1467 matrix[m][l] = p0 / freqaa[m];
1468 if(derivative != 0) {
1469 dmat[m][l] = p1 / freqaa[m];
1470 ddmat[m][l] = p2 / freqaa[m];
1474 } /* make_pmatrix */
1477 void prot_nuview(node *p)
1479 long b, i, j, k, l, m, num_sibs, sib_index;
1480 node *sib_ptr, *sib_back_ptr;
1481 psitelike prot_xx, x2;
1484 double maxx,correction;
1486 /* Figure out how many siblings the current node has */
1487 /* and be sure that pmatrices is large enough */
1488 num_sibs = count_sibs(p);
1489 for (i = 0; i < num_sibs; i++)
1490 if (pmatrices[i] == NULL)
1493 /* Recursive calls, should be called for all children */
1495 for (i=0 ; i < num_sibs; i++) {
1496 sib_ptr = sib_ptr->next;
1497 sib_back_ptr = sib_ptr->back;
1499 if (!(sib_back_ptr == NULL))
1500 if (!sib_back_ptr->tip && !sib_back_ptr->initialized)
1501 prot_nuview(sib_back_ptr);
1504 /* Make pmatrices for all possible combinations of category, rcateg */
1506 sib_ptr = p; /* return to p */
1507 for (sib_index=0; sib_index < num_sibs; sib_index++) {
1508 sib_ptr = sib_ptr->next;
1509 sib_back_ptr = sib_ptr->back;
1511 if (sib_back_ptr != NULL)
1512 lw = fabs(p->tyme - sib_back_ptr->tyme);
1516 for (j = 0; j < rcategs; j++)
1517 for (k = 0; k < categs; k++)
1518 make_pmatrix(pmatrices[sib_index][j][k], NULL, NULL, 0, lw,
1519 tbl[j][k], eigmat, probmat);
1522 for (i = 0; i < endsite; i++) {
1525 k = category[alias[i]-1] - 1;
1526 for (j = 0; j < rcategs; j++) {
1528 /* initialize to 1 all values of prot_xx */
1529 for (m = 0; m <= 19; m++)
1532 sib_ptr = p; /* return to p */
1533 /* loop through all sibs and calculate likelihoods for all possible*/
1534 /* amino acid combinations */
1535 for (sib_index=0; sib_index < num_sibs; sib_index++) {
1536 sib_ptr = sib_ptr->next;
1537 sib_back_ptr = sib_ptr->back;
1540 if (sib_back_ptr != NULL) {
1541 memcpy(x2, sib_back_ptr->protx[i][j], sizeof(psitelike));
1543 correction += sib_back_ptr->underflows[i];
1547 for (b = 0; b <= 19; b++)
1549 pmat = pmatrices[sib_index][j][k];
1550 for (m = 0; m <= 19; m++) {
1552 for (l = 0; l <= 19; l++)
1553 prod7 += (pmat[m][l] * x2[l]);
1554 prot_xx[m] *= prod7;
1555 if ( prot_xx[m] > maxx && sib_index == (num_sibs - 1 ))
1559 /* And the final point of this whole function: */
1560 memcpy(p->protx[i][j], prot_xx, sizeof(psitelike));
1562 p->underflows[i] = 0;
1563 if ( maxx < MIN_DOUBLE )
1564 fix_protx(p,i,maxx,rcategs);
1565 p->underflows[i] += correction;
1568 p->initialized = true;
1572 void getthree(node *p, double thigh, double tlow)
1574 /* compute likelihood at a new triple of points */
1576 double tt = p->tyme;
1577 double td = fabs(tdelta);
1583 if ( x[0] < tlow + epsilon ) {
1584 x[0] = tlow + epsilon;
1585 x[1] = ( x[0] + x[2] ) / 2;
1588 if ( x[2] > thigh - epsilon ) {
1589 x[2] = thigh - epsilon;
1590 x[1] = ( x[0] + x[2] ) / 2;
1593 for ( i = 0 ; i < 3 ; i++ ) {
1596 lnl[i] = prot_evaluate(p);
1600 void makenewv(node *p)
1602 /* improve a node time */
1603 long it, imin, imax, i, num_sibs;
1604 double tt, tfactor, tlow, thigh, oldlike, ymin, ymax, s32, s21, yold;
1605 boolean done, already;
1606 node *s, *sdown, *sib_ptr, *sib_back_ptr;
1608 s = curtree.nodep[p->index - 1];
1610 if (s == curtree.root)
1616 num_sibs = count_sibs(p);
1618 thigh = s->next->back->tyme;
1619 for (i=0 ; i < num_sibs; i++) {
1620 sib_ptr = sib_ptr->next;
1621 sib_back_ptr = sib_ptr->back;
1622 if (sib_back_ptr->tyme < thigh)
1623 thigh = sib_back_ptr->tyme;
1625 done = (thigh - tlow < 4.0*epsilon);
1627 if (s != curtree.root)
1628 tdelta = (thigh - tlow) / 10.0;
1630 tdelta = (thigh - s->tyme) / 5.0;
1633 getthree(s, thigh, tlow);
1634 while (it < iterations && !done) {
1637 for (i = 2; i <= 3; i++) {
1638 if (lnl[i - 1] > ymax) {
1648 lnl[1] = lnl[imax - 1];
1649 lnl[imax - 1] = ymax;
1654 s32 = (lnl[2] - lnl[1]) / (x[2] - x[1]);
1655 s21 = (lnl[1] - lnl[0]) / (x[1] - x[0]);
1656 if (fabs(x[2] - x[0]) > epsilon)
1657 curv = (s32 - s21) / ((x[2] - x[0]) / 2);
1660 slope = (s32 + s21) / 2 - curv * (x[2] - 2 * x[1] + x[0]) / 4;
1663 tdelta = -fabs(tdelta);
1665 tdelta = fabs(tdelta);
1667 tdelta = -(tfactor * slope / curv);
1668 if (tt + tdelta <= tlow + epsilon)
1669 tdelta = tlow + epsilon - tt;
1670 if (tt + tdelta >= thigh - epsilon)
1671 tdelta = thigh - epsilon - tt;
1673 done = (fabs(yold - tt) < epsilon || fabs(tdelta) < epsilon);
1676 lnlike = prot_evaluate(s);
1679 for (i = 2; i <= 3; i++) {
1680 if (lnl[i - 1] < ymin) {
1685 already = (tt == x[0]) || (tt == x[1]) || (tt == x[2]);
1686 if (!already && ymin < lnlike) {
1688 lnl[imin - 1] = lnlike;
1690 if (already || lnlike < oldlike) {
1694 curtree.likelihood = oldlike;
1701 num_sibs = count_sibs(p);
1703 for (i=0 ; i < num_sibs; i++) {
1704 sib_ptr = sib_ptr->next;
1710 for (i=0 ; i < num_sibs; i++) {
1711 sib_ptr = sib_ptr->next;
1712 prot_nuview(sib_ptr);
1719 for (i=0 ; i < num_sibs; i++) {
1720 sib_ptr = sib_ptr->next;
1723 smoothed = smoothed && done;
1727 void update(node *p)
1729 node *sib_ptr, *sib_back_ptr;
1732 /* improve time and recompute views at a node */
1735 if (p->back != NULL) {
1736 if (!p->back->tip && !p->back->initialized)
1737 prot_nuview(p->back);
1741 num_sibs = count_sibs(p);
1742 for (i=0 ; i < num_sibs; i++) {
1743 sib_ptr = sib_ptr->next;
1744 sib_back_ptr = sib_ptr->back;
1745 if (sib_back_ptr != NULL) {
1746 if (!sib_back_ptr->tip && !sib_back_ptr->initialized)
1747 prot_nuview(sib_back_ptr);
1751 if ((!usertree) || (usertree && !lngths) || p->iter) {
1758 num_sibs = count_sibs(p);
1759 for (i=0 ; i < num_sibs; i++) {
1760 sib_ptr = sib_ptr->next;
1761 prot_nuview(sib_ptr);
1766 void smooth(node *p)
1780 num_sibs = count_sibs(p);
1781 for (i=0; i < num_sibs; i++) {
1782 sib_ptr = sib_ptr->next;
1783 if (polishing || (smoothit && !smoothed)) {
1784 smooth(sib_ptr->back);
1785 p->initialized = false;
1786 sib_ptr->initialized = false;
1793 void promlk_add(node *below, node *newtip, node *newfork, boolean tempadd)
1795 /* inserts the nodes newfork and its descendant, newtip, into the tree. */
1800 below = curtree.nodep[below->index - 1];
1801 newfork = curtree.nodep[newfork->index-1];
1802 newtip = curtree.nodep[newtip->index-1];
1803 if (below->back != NULL)
1804 below->back->back = newfork;
1805 newfork->back = below->back;
1806 below->back = newfork->next->next;
1807 newfork->next->next->back = below;
1808 newfork->next->back = newtip;
1809 newtip->back = newfork->next;
1810 if (newtip->tyme < below->tyme)
1813 newfork->tyme = p->tyme;
1814 if (curtree.root == below)
1815 curtree.root = newfork;
1816 if (newfork->back != NULL) {
1817 if (p->tyme > newfork->back->tyme)
1818 newfork->tyme = (p->tyme + newfork->back->tyme) / 2.0;
1819 else newfork->tyme = p->tyme - epsilon;
1820 newfork->next->tyme = newfork->tyme;
1821 newfork->next->next->tyme = newfork->tyme;
1823 p = curtree.nodep[p->back->index - 1];
1824 done = (p == curtree.root);
1826 done = (curtree.nodep[p->back->index - 1]->tyme < p->tyme - epsilon);
1828 curtree.nodep[p->back->index - 1]->tyme = p->tyme - epsilon;
1829 curtree.nodep[p->back->index - 1]->next->tyme = p->tyme - epsilon;
1830 curtree.nodep[p->back->index - 1]->next->next->tyme = p->tyme - epsilon;
1834 newfork->tyme = newfork->tyme - 2*epsilon;
1835 newfork->next->tyme = newfork->tyme;
1836 newfork->next->next->tyme = newfork->tyme;
1839 inittrav(newtip->back);
1842 while (i < smoothings && !smoothed) {
1845 smooth(newfork->back);
1851 void promlk_re_move(node **item, node **fork, boolean tempadd)
1853 /* removes nodes item and its ancestor, fork, from the tree.
1854 the new descendant of fork's ancestor is made to be
1855 fork's second descendant (other than item). Also
1856 returns pointers to the deleted nodes, item and fork */
1860 if ((*item)->back == NULL) {
1864 *item = curtree.nodep[(*item)->index-1];
1865 *fork = curtree.nodep[(*item)->back->index - 1];
1866 if (curtree.root == *fork) {
1867 if (*item == (*fork)->next->back)
1868 curtree.root = (*fork)->next->next->back;
1870 curtree.root = (*fork)->next->back;
1872 p = (*item)->back->next->back;
1873 q = (*item)->back->next->next->back;
1878 (*fork)->back = NULL;
1880 while (p != *fork) {
1884 (*item)->back = NULL;
1890 while (i <= smoothings) {
1896 } /* promlk_re_move */
1899 double prot_evaluate(node *p)
1902 static contribarr like, nulike, clai;
1903 double sum, sum2, sumc=0, y, prod4, prodl, frexm, sumterm, lterm;
1905 long i, j, k, l, m, lai;
1911 if (p == curtree.root && (count_sibs(p) == 2)) {
1913 q = p->next->next->back;
1914 y = r->tyme + q->tyme - 2 * p->tyme;
1915 if (!r->tip && !r->initialized) prot_nuview (r);
1916 if (!q->tip && !q->initialized) prot_nuview (q);
1917 } else if (p == curtree.root) {
1918 /* the next two lines copy tyme and x to p->next. Normally they are
1919 not initialized for an internal node. */
1920 /* assumes bifurcation */
1921 p->next->tyme = p->tyme;
1922 prot_nuview(p->next);
1925 y = fabs(p->next->tyme - q->tyme);
1929 if (!r->tip && !r->initialized) prot_nuview (r);
1930 if (!q->tip && !q->initialized) prot_nuview (q);
1931 y = fabs(r->tyme - q->tyme);
1934 for (j = 0; j < rcategs; j++)
1935 for (k = 0; k < categs; k++)
1936 make_pmatrix(pmatrices[0][j][k],NULL,NULL,0,y,tbl[j][k],eigmat,probmat);
1937 for (i = 0; i < endsite; i++) {
1938 k = category[alias[i]-1] - 1;
1939 for (j = 0; j < rcategs; j++) {
1940 memcpy(x1, r->protx[i][j], sizeof(psitelike));
1941 memcpy(x2, q->protx[i][j], sizeof(psitelike));
1943 pmat = pmatrices[0][j][k];
1944 for (m = 0; m <= 19; m++) {
1946 for (l = 0; l <= 19; l++)
1947 prodl += (pmat[m][l] * x2[l]);
1948 frexm = x1[m] * freqaa[m];
1949 prod4 += (prodl * frexm);
1954 for (j = 0; j < rcategs; j++)
1955 sumterm += probcat[j] * tterm[j];
1957 sumterm = 0.00000001; /* ??? */
1958 lterm = log(sumterm) + p->underflows[i] + q->underflows[i];
1959 for (j = 0; j < rcategs; j++)
1960 clai[j] = tterm[j] / sumterm;
1961 memcpy(contribution[i], clai, rcategs * sizeof(double));
1962 if (!auto_ && usertree && (which <= shimotrees))
1963 l0gf[which - 1][i] = lterm;
1964 sum += aliasweight[i] * lterm;
1967 for (j = 0; j < rcategs; j++)
1969 for (i = 0; i < sites; i++) {
1971 for (k = 0; k < rcategs; k++)
1972 sumc += probcat[k] * like[k];
1974 if ((ally[i] > 0) && (location[ally[i]-1] > 0)) {
1975 lai = location[ally[i] - 1];
1976 memcpy(clai, contribution[lai - 1], rcategs*sizeof(double));
1977 for (j = 0; j < rcategs; j++)
1978 nulike[j] = ((1.0 - lambda) * like[j] + sumc) * clai[j];
1980 for (j = 0; j < rcategs; j++)
1981 nulike[j] = ((1.0 - lambda) * like[j] + sumc);
1983 memcpy(like, nulike, rcategs * sizeof(double));
1986 for (i = 0; i < rcategs; i++)
1987 sum2 += probcat[i] * like[i];
1990 curtree.likelihood = sum;
1991 if (auto_ || !usertree)
1993 if(which <= shimotrees)
1994 l0gl[which - 1] = sum;
2000 if (sum > maxlogl) {
2005 } /* prot_evaluate */
2008 void tryadd(node *p, node **item, node **nufork)
2009 { /* temporarily adds one fork and one tip to the tree.
2010 if the location where they are added yields greater
2011 likelihood than other locations tested up to that
2012 time, then keeps that location as there */
2015 grcategs = (categs > rcategs) ? categs : rcategs;
2017 promlk_add(p, *item, *nufork, true);
2018 like = prot_evaluate(p);
2020 if (like >= bestyet || bestyet == UNDEFINED)
2021 prot_copy_(&curtree, &bestree, nonodes, grcategs);
2023 if (like > bestyet || bestyet == UNDEFINED) {
2027 promlk_re_move(item, nufork, true);
2031 void addpreorder(node *p, node *item_, node *nufork_, boolean contin,
2032 boolean continagain)
2034 /* traverses a binary tree, calling function tryadd
2035 at a node before calling tryadd at its descendants */
2036 node *item, *nufork;
2042 tryadd(p, &item, &nufork);
2043 contin = continagain;
2044 if ((!p->tip) && contin) {
2045 addpreorder(p->next->back, item, nufork, contin, continagain);
2046 addpreorder(p->next->next->back, item, nufork, contin, continagain);
2051 void restoradd(node *below, node *newtip, node *newfork, double prevtyme)
2053 /* restore "new" tip and fork to place "below". restore tymes */
2054 /* assumes bifurcation */
2055 hookup(newfork, below->back);
2056 hookup(newfork->next, below);
2057 hookup(newtip, newfork->next->next);
2058 curtree.nodep[newfork->index-1] = newfork;
2059 newfork->tyme = prevtyme;
2060 /* assumes bifurcations */
2061 newfork->next->tyme = prevtyme;
2062 newfork->next->next->tyme = prevtyme;
2066 void tryrearr(node *p, boolean *success)
2068 /* evaluates one rearrangement of the tree.
2069 if the new tree has greater likelihood than the old
2070 one sets success = TRUE and keeps the new tree.
2071 otherwise, restores the old tree */
2072 node *frombelow, *whereto, *forknode;
2073 double oldlike, prevtyme;
2076 if (p == curtree.root)
2078 forknode = curtree.nodep[p->back->index - 1];
2079 if (forknode == curtree.root)
2082 prevtyme = forknode->tyme;
2083 /* the following statement presumes bifurcating tree */
2084 if (forknode->next->back == p) {
2085 frombelow = forknode->next->next->back;
2089 frombelow = forknode->next->back;
2092 whereto = curtree.nodep[forknode->back->index - 1];
2093 promlk_re_move(&p, &forknode, true);
2094 promlk_add(whereto, p, forknode, true);
2095 like = prot_evaluate(p);
2096 if (like <= oldlike && oldlike != UNDEFINED) {
2097 promlk_re_move(&p, &forknode, true);
2098 restoradd(frombelow, p, forknode, prevtyme);
2099 if (wasonleft && (forknode->next->next->back == p)) {
2100 hookup (forknode->next->back, forknode->next->next);
2101 hookup (forknode->next, p);
2103 curtree.likelihood = oldlike;
2105 inittrav(forknode->next);
2106 inittrav(forknode->next->next);
2114 void repreorder(node *p, boolean *success)
2116 /* traverses a binary tree, calling function tryrearr
2117 at a node before calling tryrearr at its descendants */
2120 tryrearr(p, success);
2124 repreorder(p->next->back, success);
2126 repreorder(p->next->next->back, success);
2130 void rearrange(node **r)
2132 /* traverses the tree (preorder), finding any local
2133 rearrangement which increases the likelihood.
2134 if traversal succeeds in increasing the tree's
2135 likelihood, function rearrange runs traversal again */
2140 repreorder(*r, &success);
2145 void nodeinit(node *p)
2147 /* set up times at one node */
2148 node *sib_ptr, *sib_back_ptr;
2153 num_sibs = count_sibs(p);
2155 /* lowertyme = lowest of children's times */
2156 lowertyme = p->next->back->tyme;
2157 for (i=0 ; i < num_sibs; i++) {
2158 sib_ptr = sib_ptr->next;
2159 sib_back_ptr = sib_ptr->back;
2160 if (sib_back_ptr->tyme < lowertyme)
2161 lowertyme = sib_back_ptr->tyme;
2164 p->tyme = lowertyme - 0.1;
2167 for (i=0 ; i < num_sibs; i++) {
2168 sib_ptr = sib_ptr->next;
2169 sib_back_ptr = sib_ptr->back;
2171 sib_ptr->tyme = p->tyme;
2172 sib_back_ptr->v = sib_back_ptr->tyme - p->tyme;
2173 sib_ptr->v = sib_back_ptr->v;
2178 void initrav(node *p)
2182 node *sib_ptr, *sib_back_ptr;
2184 /* traverse to set up times throughout tree */
2189 num_sibs = count_sibs(p);
2190 for (i=0 ; i < num_sibs; i++) {
2191 sib_ptr = sib_ptr->next;
2192 sib_back_ptr = sib_ptr->back;
2193 initrav(sib_back_ptr);
2200 void travinit(node *p)
2203 node *sib_ptr, *sib_back_ptr;
2205 /* traverse to set up initial values */
2215 num_sibs = count_sibs(p);
2216 for (i=0 ; i < num_sibs; i++) {
2217 sib_ptr = sib_ptr->next;
2218 sib_back_ptr = sib_ptr->back;
2219 travinit(sib_back_ptr);
2223 p->initialized = true;
2227 void travsp(node *p)
2230 node *sib_ptr, *sib_back_ptr;
2232 /* traverse to find tips */
2233 if (p == curtree.root)
2239 num_sibs = count_sibs(p);
2240 for (i=0 ; i < num_sibs; i++) {
2241 sib_ptr = sib_ptr->next;
2242 sib_back_ptr = sib_ptr->back;
2243 travsp(sib_back_ptr);
2251 /* evaluate likelihood of tree, after iterating branch lengths */
2252 long i, j, num_sibs;
2257 for (i = 0; i < spp; i++)
2258 curtree.nodep[i]->initialized = false;
2259 for (i = spp; i < nonodes; i++) {
2260 sib_ptr = curtree.nodep[i];
2261 sib_ptr->initialized = false;
2262 num_sibs = count_sibs(sib_ptr);
2263 for (j=0 ; j < num_sibs; j++) {
2264 sib_ptr = sib_ptr->next;
2265 sib_ptr->initialized = false;
2270 initrav(curtree.root);
2271 travsp(curtree.root);
2272 for (i = 1; i <= smoothings * 4; i++)
2273 smooth(curtree.root);
2274 prot_evaluate(curtree.root);
2278 void promlk_coordinates(node *p, long *tipy)
2280 /* establishes coordinates of nodes */
2281 node *q, *first, *last, *pp1 =NULL, *pp2 =NULL;
2282 long num_sibs, p1, p2, i;
2286 p->ycoord = (*tipy);
2294 promlk_coordinates(q->back, tipy);
2297 num_sibs = count_sibs(p);
2298 p1 = (long)((num_sibs+1)/2.0);
2299 p2 = (long)((num_sibs+2)/2.0);
2304 if (i == p1) pp1 = q->back;
2305 if (i == p2) pp2 = q->back;
2310 p->xcoord = (long)(0.5 - over * p->tyme);
2311 p->ycoord = (pp1->ycoord + pp2->ycoord) / 2;
2312 p->ymin = first->ymin;
2313 p->ymax = last->ymax;
2314 } /* promlk_coordinates */
2317 void promlk_drawline(long i, double scale)
2319 /* draws one row of the tree diagram by moving up tree */
2320 node *p, *q, *r, *first =NULL, *last =NULL;
2322 boolean extra, done;
2327 if ((long)(p->ycoord) == i) {
2328 if (p->index - spp >= 10)
2329 fprintf(outfile, "-%2ld", p->index - spp);
2331 fprintf(outfile, "--%ld", p->index - spp);
2334 fprintf(outfile, " ");
2340 if (i >= r->back->ymin && i <= r->back->ymax) {
2345 } while (!(done || r == p));
2346 first = p->next->back;
2348 while (r->next != p)
2353 n = (long)(scale * ((long)(p->xcoord) - (long)(q->xcoord)) + 0.5);
2354 if (n < 3 && !q->tip)
2360 if ((long)(q->ycoord) == i && !done) {
2361 if (p->ycoord != q->ycoord)
2366 for (j = 1; j <= n - 2; j++)
2368 if (q->index - spp >= 10)
2369 fprintf(outfile, "%2ld", q->index - spp);
2371 fprintf(outfile, "-%ld", q->index - spp);
2374 for (j = 1; j < n; j++)
2377 } else if (!p->tip) {
2378 if ((long)(last->ycoord) > i && (long)(first->ycoord) < i &&
2379 i != (long)(p->ycoord)) {
2381 for (j = 1; j < n; j++)
2384 for (j = 1; j <= n; j++)
2388 for (j = 1; j <= n; j++)
2394 if ((long)(p->ycoord) == i && p->tip) {
2395 for (j = 0; j < nmlngth; j++)
2396 putc(nayme[p->index - 1][j], outfile);
2398 putc('\n', outfile);
2399 } /* promlk_drawline */
2402 void promlk_printree()
2404 /* prints out diagram of the tree */
2412 putc('\n', outfile);
2414 promlk_coordinates(curtree.root, &tipy);
2418 scale = 1.0 / (long)(p->tyme - curtree.root->tyme + 1.000);
2419 putc('\n', outfile);
2420 for (i = 1; i <= tipy - down; i++)
2421 promlk_drawline(i, scale);
2422 putc('\n', outfile);
2423 } /* promlk_printree */
2426 void describe(node *p)
2429 node *sib_ptr, *sib_back_ptr;
2432 if (p == curtree.root)
2433 fprintf(outfile, " root ");
2435 fprintf(outfile, "%4ld ", p->back->index - spp);
2437 for (i = 0; i < nmlngth; i++)
2438 putc(nayme[p->index - 1][i], outfile);
2440 fprintf(outfile, "%4ld ", p->index - spp);
2441 if (p != curtree.root) {
2442 fprintf(outfile, "%11.5f", (p->tyme - curtree.root->tyme));
2443 v = (p->tyme - curtree.nodep[p->back->index - 1]->tyme);
2444 fprintf(outfile, "%13.5f", v);
2446 putc('\n', outfile);
2450 num_sibs = count_sibs(p);
2451 for (i=0 ; i < num_sibs; i++) {
2452 sib_ptr = sib_ptr->next;
2453 sib_back_ptr = sib_ptr->back;
2454 describe(sib_back_ptr);
2460 void prot_reconstr(node *p, long n)
2462 /* reconstruct and print out acid at site n+1 at node p */
2463 long i, j, k, first, num_sibs = 0;
2464 double f, sum, xx[20];
2468 putc(y[p->index-1][n], outfile);
2470 num_sibs = count_sibs(p);
2471 if ((ally[n] == 0) || (location[ally[n]-1] == 0))
2474 j = location[ally[n]-1] - 1;
2476 for (i = 0; i <= 19; i++) {
2477 f = p->protx[j][mx-1][i];
2480 for (k = 0; k < num_sibs; k++) {
2482 f *= q->protx[j][mx-1][i];
2486 xx[i] = f * freqaa[i];
2489 for (i = 0; i <= 19; i++)
2492 for (i = 0; i <= 19; i++)
2493 if (xx[i] > xx[first])
2495 if (xx[first] > 0.95)
2496 putc(aachar[first], outfile);
2498 putc(tolower(aachar[first]), outfile);
2499 if (rctgry && rcategs > 1)
2505 } /* prot_reconstr */
2508 void rectrav(node *p, long m, long n)
2510 /* print out segment of reconstructed sequence for one branch */
2516 for (i = 0; i < nmlngth; i++)
2517 putc(nayme[p->index-1][i], outfile);
2519 fprintf(outfile, "%4ld ", p->index - spp);
2520 fprintf(outfile, " ");
2522 for (i = m; i <= n; i++) {
2523 if ((i % 10 == 0) && (i != m))
2525 prot_reconstr(p, i);
2527 putc('\n', outfile);
2529 num_sibs = count_sibs(p);
2531 for (i = 0; i < num_sibs; i++) {
2532 sib_ptr = sib_ptr->next;
2533 rectrav(sib_ptr->back, m, n);
2544 double like[maxcategs], nulike[maxcategs];
2547 mp = (long **)Malloc(sites * sizeof(long *));
2548 for (i = 0; i <= sites-1; ++i)
2549 mp[i] = (long *)Malloc(sizeof(long)*rcategs);
2550 fprintf(outfile, "\nLn Likelihood = %11.5f\n\n", curtree.likelihood);
2551 fprintf(outfile, " Ancestor Node Node Height Length\n");
2552 fprintf(outfile, " -------- ---- ---- ------ ------\n");
2553 describe(curtree.root);
2554 putc('\n', outfile);
2555 if (rctgry && rcategs > 1) {
2556 for (i = 0; i < rcategs; i++)
2558 for (i = sites - 1; i >= 0; i--) {
2560 for (j = 0; j < rcategs; j++) {
2561 nulike[j] = (lambda1 + lambda * probcat[j]) * like[j];
2563 for (k = 1; k <= rcategs; k++) {
2565 if (lambda * probcat[k - 1] * like[k - 1] > nulike[j]) {
2566 nulike[j] = lambda * probcat[k - 1] * like[k - 1];
2571 if ((ally[i] > 0) && (location[ally[i]-1] > 0))
2572 nulike[j] *= contribution[location[ally[i] - 1] - 1][j];
2575 for (j = 0; j < rcategs; j++)
2577 memcpy(like, nulike, rcategs * sizeof(double));
2581 for (i = 1; i <= rcategs; i++) {
2582 if (probcat[i - 1] * like[i - 1] > mode) {
2584 mode = probcat[i - 1] * like[i - 1];
2589 "Combination of categories that contributes the most to the likelihood:\n\n");
2590 for (i = 1; i <= nmlngth + 3; i++)
2592 for (i = 1; i <= sites; i++) {
2593 fprintf(outfile, "%ld", mx);
2596 if (i % 60 == 0 && i != sites) {
2597 putc('\n', outfile);
2598 for (j = 1; j <= nmlngth + 3; j++)
2601 mx = mp[i - 1][mx - 1];
2603 fprintf(outfile, "\n\n");
2604 marginal = (double **) Malloc( sites*sizeof(double *));
2605 for (i = 0; i < sites; i++)
2606 marginal[i] = (double *) Malloc( rcategs*sizeof(double));
2607 for (i = 0; i < rcategs; i++)
2609 for (i = sites - 1; i >= 0; i--) {
2611 for (j = 0; j < rcategs; j++) {
2612 nulike[j] = (lambda1 + lambda * probcat[j]) * like[j];
2613 for (k = 1; k <= rcategs; k++) {
2615 nulike[j] += lambda * probcat[k - 1] * like[k - 1];
2617 if ((ally[i] > 0) && (location[ally[i]-1] > 0))
2618 nulike[j] *= contribution[location[ally[i] - 1] - 1][j];
2621 for (j = 0; j < rcategs; j++) {
2623 marginal[i][j] = nulike[j];
2625 memcpy(like, nulike, rcategs * sizeof(double));
2627 for (i = 0; i < rcategs; i++)
2629 for (i = 0; i < sites; i++) {
2631 for (j = 0; j < rcategs; j++) {
2632 nulike[j] = (lambda1 + lambda * probcat[j]) * like[j];
2633 for (k = 1; k <= rcategs; k++) {
2635 nulike[j] += lambda * probcat[k - 1] * like[k - 1];
2637 marginal[i][j] *= like[j] * probcat[j];
2640 for (j = 0; j < rcategs; j++)
2642 memcpy(like, nulike, rcategs * sizeof(double));
2644 for (j = 0; j < rcategs; j++)
2645 sum += marginal[i][j];
2646 for (j = 0; j < rcategs; j++)
2647 marginal[i][j] /= sum;
2649 fprintf(outfile, "Most probable category at each site if > 0.95");
2650 fprintf(outfile, " probability (\".\" otherwise)\n\n");
2651 for (i = 1; i <= nmlngth + 3; i++)
2653 for (i = 0; i < sites; i++) {
2655 for (j = 0; j < rcategs; j++)
2656 if (marginal[i][j] > sum) {
2657 sum = marginal[i][j];
2661 fprintf(outfile, "%ld", mm+1);
2664 if ((i+1) % 60 == 0) {
2666 putc('\n', outfile);
2667 for (j = 1; j <= nmlngth + 3; j++)
2671 else if ((i+1) % 10 == 0)
2674 putc('\n', outfile);
2675 for (i = 0; i < sites; i++)
2679 putc('\n', outfile);
2680 putc('\n', outfile);
2681 putc('\n', outfile);
2683 fprintf(outfile, "Probable sequences at interior nodes:\n\n");
2684 fprintf(outfile, " node ");
2685 for (i = 0; (i < 13) && (i < ((sites + (sites-1)/10 - 39) / 2)); i++)
2687 fprintf(outfile, "Reconstructed sequence (caps if > 0.95)\n\n");
2688 if (!rctgry || (rcategs == 1))
2690 for (i = 0; i < sites; i += 60) {
2694 rectrav(curtree.root, i, k);
2695 putc('\n', outfile);
2699 for (i = 0; i <= sites-1; ++i)
2705 void promlk_treeout(node *p)
2707 /* write out file with representation of final tree */
2709 long i, n, w, num_sibs;
2715 for (i = 1; i <= nmlngth; i++) {
2716 if (nayme[p->index - 1][i - 1] != ' ')
2719 for (i = 0; i < n; i++) {
2720 c = nayme[p->index - 1][i];
2728 num_sibs = count_sibs(p);
2732 for (i=0; i < (num_sibs - 1); i++) {
2733 sib_ptr = sib_ptr->next;
2734 promlk_treeout(sib_ptr->back);
2738 putc('\n', outtree);
2742 sib_ptr = sib_ptr->next;
2743 promlk_treeout(sib_ptr->back);
2747 if (p == curtree.root) {
2748 fprintf(outtree, ";\n");
2751 x = (p->tyme - curtree.nodep[p->back->index - 1]->tyme);
2753 w = (long)(0.4342944822 * log(x));
2757 w = (long)(0.4342944822 * log(-x)) + 1;
2760 fprintf(outtree, ":%*.5f", (int)(w + 7), x);
2762 } /* promlk_treeout */
2765 void initpromlnode(node **p, node **grbg, node *q, long len, long nodei,
2766 long *ntips, long *parens, initops whichinit,
2767 pointarray treenode, pointarray nodep, Char *str,
2768 Char *ch, FILE *intree)
2770 /* initializes a node */
2772 double valyew, divisor;
2774 switch (whichinit) {
2777 (*p)->index = nodei;
2779 malloc_ppheno((*p), endsite, rcategs);
2780 nodep[(*p)->index - 1] = (*p);
2784 malloc_ppheno(*p, endsite, rcategs);
2785 (*p)->index = nodei;
2788 match_names_to_data(str, nodep, p, spp);
2791 (*p)->initialized = false;
2794 if ((*p)->back != NULL)
2795 (*p)->back->iter = true;
2798 processlength(&valyew, &divisor, ch, &minusread, intree, parens);
2799 (*p)->v = valyew / divisor;
2801 if ((*p)->back != NULL) {
2802 (*p)->back->v = (*p)->v;
2803 (*p)->back->iter = false;
2807 curtree.nodep[spp]->iter = false;
2809 default: /* cases hslength, hsnolength, treewt */
2810 break; /* should never occur */
2812 } /* initpromlnode */
2815 void tymetrav(node *p, double *x)
2817 /* set up times of nodes */
2825 num_sibs = count_sibs(p);
2826 for (i=0; i < num_sibs; i++) {
2827 sib_ptr = sib_ptr->next;
2828 tymetrav(sib_ptr->back, x);
2837 while (q->next != p) {
2842 (*x) = p->tyme - p->v;
2846 void free_all_protx (long nonodes, pointarray treenode)
2852 /* Zero thru spp are tips, */
2853 for (i = 0; i < spp; i++) {
2854 for (j = 0; j < endsite; j++)
2855 free(treenode[i]->protx[j]);
2856 free(treenode[i]->protx);
2859 /* The rest are rings (i.e. triads) */
2860 for (i = spp; i < nonodes; i++) {
2861 if (treenode[i] != NULL) {
2863 for (j = 1; j <= 3; j++) {
2864 for (k = 0; k < endsite; k++)
2871 } /* free_all_protx */
2876 /* constructs a binary tree from the pointers in curtree.nodep,
2877 adds each node at location which yields highest likelihood
2878 then rearranges the tree for greatest likelihood */
2882 double bestlike, gotlike, x;
2883 node *item, *nufork, *dummy, *q, *root=NULL;
2884 boolean dummy_haslengths, dummy_first, goteof;
2887 pointarray dummy_treenode=NULL;
2889 grcategs = (categs > rcategs) ? categs : rcategs;
2894 for (i = 1; i <= spp; i++)
2895 enterorder[i - 1] = i;
2897 randumize(seed, enterorder);
2898 curtree.root = curtree.nodep[spp];
2899 curtree.root->back = NULL;
2900 for (i = 0; i < spp; i++)
2901 curtree.nodep[i]->back = NULL;
2902 for (i = spp; i < nonodes; i++) {
2903 q = curtree.nodep[i];
2905 while ((q = q->next) != curtree.nodep[i])
2909 promlk_add(curtree.nodep[enterorder[0]-1], curtree.nodep[enterorder[1]-1],
2910 curtree.nodep[spp], false);
2912 printf("\nAdding species:\n");
2913 writename(0, 2, enterorder);
2915 phyFillScreenColor();
2920 for (i = 3; i <= spp; i++) {
2921 bestyet = UNDEFINED;
2922 bestree.likelihood = bestyet;
2923 there = curtree.root;
2924 item = curtree.nodep[enterorder[i - 1] - 1];
2925 nufork = curtree.nodep[spp + i - 2];
2926 lastsp = (i == spp);
2927 addpreorder(curtree.root, item, nufork, true, true);
2928 promlk_add(there, item, nufork, false);
2929 like = prot_evaluate(curtree.root);
2930 rearrange(&curtree.root);
2931 if (curtree.likelihood > bestree.likelihood) {
2932 prot_copy_(&curtree, &bestree, nonodes, grcategs);
2935 writename(i - 1, 1, enterorder);
2937 phyFillScreenColor();
2940 if (lastsp && global) {
2942 printf("Doing global rearrangements\n");
2944 for (j = 1; j <= nonodes; j++)
2945 if ( j % (( nonodes / 72 ) + 1 ) == 0 )
2954 for (j = 0; j < nonodes; j++) {
2955 bestyet = UNDEFINED;
2956 item = curtree.nodep[j];
2957 if (item != curtree.root) {
2958 nufork = curtree.nodep[curtree.nodep[j]->back->index - 1];
2959 promlk_re_move(&item, &nufork, false);
2960 there = curtree.root;
2961 addpreorder(curtree.root, item, nufork, true, true);
2962 promlk_add(there, item, nufork, false);
2965 if ( j % (( nonodes / 72 ) + 1 ) == 0 )
2972 } while (bestlike < gotlike);
2975 if (njumble > 1 && lastsp) {
2976 for (i = 0; i < spp; i++ )
2977 promlk_re_move(&curtree.nodep[i], &dummy, false);
2978 if (jumb == 1 || bestree2.likelihood < bestree.likelihood)
2979 prot_copy_(&bestree, &bestree2, nonodes, grcategs);
2981 if (jumb == njumble) {
2983 prot_copy_(&bestree2, &curtree, nonodes, grcategs);
2985 prot_copy_(&bestree, &curtree, nonodes, grcategs);
2986 fprintf(outfile, "\n\n");
2988 curtree.likelihood = prot_evaluate(curtree.root);
2993 promlk_treeout(curtree.root);
2997 openfile(&intree, INTREE, "input tree file", "r", progname, intreename);
2998 numtrees = countsemic(&intree);
2999 if(numtrees > MAXSHIMOTREES)
3000 shimotrees = MAXSHIMOTREES;
3002 shimotrees = numtrees;
3004 initseed(&inseed, &inseed0, seed);
3005 l0gl = (double *) Malloc(shimotrees * sizeof(double));
3006 l0gf = (double **) Malloc(shimotrees * sizeof(double *));
3007 for (i=0; i < shimotrees; ++i)
3008 l0gf[i] = (double *)Malloc(endsite * sizeof(double));
3010 fprintf(outfile, "User-defined tree");
3013 fprintf(outfile, ":\n\n");
3015 fprintf(outfile, "\n\n");
3017 while (which <= numtrees) {
3019 /* These initializations required each time through the loop
3020 since multiple trees require re-initialization */
3021 dummy_haslengths = true;
3026 treeread(intree, &root, dummy_treenode, &goteof, &dummy_first,
3027 curtree.nodep, &nextnode, &dummy_haslengths, &grbg,
3028 initpromlnode,false,nonodes);
3032 root = curtree.nodep[root->index - 1];
3033 curtree.root = root;
3036 tymetrav(curtree.root, &x);
3038 if (goteof && (which <= numtrees)) {
3039 /* if we hit the end of the file prematurely */
3041 printf ("ERROR: trees missing at end of file.\n");
3042 printf ("\tExpected number of trees:\t\t%ld\n", numtrees);
3043 printf ("\tNumber of trees actually in file:\t%ld.\n\n", which - 1);
3046 curtree.start = curtree.nodep[0]->back;
3052 promlk_treeout(curtree.root);
3054 if(which < numtrees){
3055 prot_freex_notip(nonodes, curtree.nodep);
3056 gdispose(curtree.root, &grbg, curtree.nodep);
3062 if (!auto_ && numtrees > 1 && weightsum > 1 )
3063 standev2(numtrees, maxwhich, 0, endsite, maxlogl, l0gl, l0gf,
3068 for (i=0; i < shimotrees; i++)
3076 free_all_protx(nonodes2, curtree.nodep);
3078 free_all_protx(nonodes2, bestree.nodep);
3080 free_all_protx(nonodes2, bestree2.nodep);
3083 printf("\n\nOutput written to file \"%s\"\n\n", outfilename);
3085 printf("Tree also written onto file \"%s\"\n", outtreename);
3095 /* Free and/or close stuff */
3101 /* Seems to require freeing every time... */
3102 for (i = 0; i < spp; i++) {
3116 if (! (njumble <= 1))
3117 freetree2(bestree2.nodep, nonodes2);
3122 fixmacfile(outfilename);
3123 fixmacfile(outtreename);
3128 int main(int argc, Char *argv[])
3129 { /* Protein Maximum Likelihood with molecular clock */
3132 argc = 1; /* macsetup("Promlk", ""); */
3137 openfile(&infile, INFILE, "input file", "r", argv[0], infilename);
3138 openfile(&outfile, OUTFILE, "output file", "w", argv[0], outfilename);
3148 openfile(&outtree,OUTTREE,"output tree file","w",argv[0],outtreename);
3150 openfile(&catfile,CATFILE,"categories file","r",argv[0],catfilename);
3151 if (weights || justwts)
3152 openfile(&weightfile,WEIGHTFILE,"weights file","r",argv[0],weightfilename);
3153 for (ith = 1; ith <= datasets; ith++) {
3155 fprintf(outfile, "Data set # %ld:\n\n", ith);
3157 printf("\nData set # %ld:\n", ith);
3163 for (jumb = 1; jumb <= njumble; jumb++){
3170 printf("Done.\n\n");
3172 phyRestoreConsoleAttributes();
3175 } /* Protein Maximum Likelihood with molecular clock */