5 // "Standard" NJ distance: the Kimura measure.
\r
6 // This is defined to be:
\r
8 // log_e(1 - p - p*p/5)
\r
10 // where p is the fraction of residues that differ, i.e.:
\r
12 // p = (1 - fractional_conservation)
\r
14 // This measure is infinite for p = 0.8541 and is considered
\r
15 // unreliable for p >= 0.75 (according to the ClustalW docs).
\r
16 // ClustalW uses a table lookup for values > 0.75.
\r
17 // The following table was copied from the ClustalW file dayhoff.h.
\r
19 static int dayhoff_pams[]={
\r
20 195, /* 75.0% observed d; 195 PAMs estimated = 195% estimated d */
\r
21 196, /* 75.1% observed d; 196 PAMs estimated */
\r
22 197, 198, 199, 200, 200, 201, 202, 203,
\r
23 204, 205, 206, 207, 208, 209, 209, 210, 211, 212,
\r
24 213, 214, 215, 216, 217, 218, 219, 220, 221, 222,
\r
25 223, 224, 226, 227, 228, 229, 230, 231, 232, 233,
\r
26 234, 236, 237, 238, 239, 240, 241, 243, 244, 245,
\r
27 246, 248, 249, 250, /* 250 PAMs = 80.3% observed d */
\r
28 252, 253, 254, 255, 257, 258,
\r
29 260, 261, 262, 264, 265, 267, 268, 270, 271, 273,
\r
30 274, 276, 277, 279, 281, 282, 284, 285, 287, 289,
\r
31 291, 292, 294, 296, 298, 299, 301, 303, 305, 307,
\r
32 309, 311, 313, 315, 317, 319, 321, 323, 325, 328,
\r
33 330, 332, 335, 337, 339, 342, 344, 347, 349, 352,
\r
34 354, 357, 360, 362, 365, 368, 371, 374, 377, 380,
\r
35 383, 386, 389, 393, 396, 399, 403, 407, 410, 414,
\r
36 418, 422, 426, 430, 434, 438, 442, 447, 451, 456,
\r
37 461, 466, 471, 476, 482, 487, 493, 498, 504, 511,
\r
38 517, 524, 531, 538, 545, 553, 560, 569, 577, 586,
\r
39 595, 605, 615, 626, 637, 649, 661, 675, 688, 703,
\r
40 719, 736, 754, 775, 796, 819, 845, 874, 907, 945,
\r
41 /* 92.9% observed; 945 PAMs */
\r
42 988 /* 93.0% observed; 988 PAMs */
\r
44 static int iTableEntries = sizeof(dayhoff_pams)/sizeof(dayhoff_pams[0]);
\r
46 double KimuraDist(double dPctId)
\r
48 double p = 1 - dPctId;
\r
49 // Typical case: use Kimura's empirical formula
\r
51 return -log(1 - p - (p*p)/5);
\r
53 // Per ClustalW, return 10.0 for anything over 93%
\r
57 // If p >= 0.75, use table lookup
\r
58 assert(p <= 1 && p >= 0.75);
\r
59 // Thanks for Michael Hoel for pointing out a bug
\r
60 // in the table index calculation in versions <= 3.52.
\r
61 int iTableIndex = (int) ((p - 0.75)*1000 + 0.5);
\r
62 if (iTableIndex < 0 || iTableIndex >= iTableEntries)
\r
63 Quit("Internal error in MSADistKimura::ComputeDist");
\r
65 return dayhoff_pams[iTableIndex] / 100.0;
\r
68 //double MSADistKimura::ComputeDist(const MSA &msa, unsigned uSeqIndex1,
\r
69 // unsigned uSeqIndex2)
\r
71 // double dPctId = msa.GetPctIdentityPair(uSeqIndex1, uSeqIndex2);
\r
72 // return KimuraDist(dPctId);
\r
75 double KimuraDistToPctId(double dKimuraDist)
\r
77 // Solve quadratic equation
\r
78 const double a = 0.2;
\r
80 const double c = 1.0 - exp(-dKimuraDist);
\r
81 const double p = (-b + sqrt(b*b + 4*a*c))/(2*a);
\r
85 double PctIdToHeightKimura(double dPctId)
\r
87 return KimuraDist(dPctId);
\r