Next version of JABA
[jabaws.git] / binaries / src / muscle / clwwt.cpp
1 #include "muscle.h"\r
2 #include "tree.h"\r
3 #include "msa.h"\r
4 \r
5 /***\r
6 Compute weights by the CLUSTALW method.\r
7 Thompson, Higgins and Gibson (1994), CABIOS (10) 19-29;\r
8 see also CLUSTALW paper.\r
9 \r
10 Weights are computed from the edge lengths of a rooted tree.\r
11 \r
12 Define the strength of an edge to be its length divided by the number\r
13 of leaves under that edge. The weight of a sequence is then the sum\r
14 of edge strengths on the path from the root to the leaf.\r
15 \r
16 Example.\r
17 \r
18         0.2\r
19        -----A     0.1\r
20          -x         ------- B     0.7\r
21            --------y           ----------- C\r
22             0.3     ----------z\r
23                     0.4    -------------- D\r
24                                  0.8\r
25 \r
26 Edge    Length  Leaves  Strength\r
27 ----    -----   ------  --------\r
28 xy              0.3             3               0.1\r
29 xA              0.2             1               0.2\r
30 yz              0.4             2               0.2\r
31 yB              0.1             1               0.1\r
32 zC              0.7             1               0.7\r
33 zD              0.8             1               0.8\r
34 \r
35 Leaf    Path            Strengths                       Weight\r
36 ----    ----            ---------                       ------\r
37 A               xA                      0.2                                     0.2\r
38 B               xy-yB           0.1 + 0.1                       0.2\r
39 C               xy-yz-zC        0.1 + 0.2 + 0.7         1.0\r
40 D               xy-yz-zD        0.1 + 0.2 + 0.8         1.1\r
41 \r
42 ***/\r
43 \r
44 #define TRACE 0\r
45 \r
46 static unsigned CountLeaves(const Tree &tree, unsigned uNodeIndex,\r
47   unsigned LeavesUnderNode[])\r
48         {\r
49         if (tree.IsLeaf(uNodeIndex))\r
50                 {\r
51                 LeavesUnderNode[uNodeIndex] = 1;\r
52                 return 1;\r
53                 }\r
54 \r
55         const unsigned uLeft = tree.GetLeft(uNodeIndex);\r
56         const unsigned uRight = tree.GetRight(uNodeIndex);\r
57         const unsigned uRightCount = CountLeaves(tree, uRight, LeavesUnderNode);\r
58         const unsigned uLeftCount = CountLeaves(tree, uLeft, LeavesUnderNode);\r
59         const unsigned uCount = uRightCount + uLeftCount;\r
60         LeavesUnderNode[uNodeIndex] = uCount;\r
61         return uCount;\r
62         }\r
63 \r
64 void CalcClustalWWeights(const Tree &tree, WEIGHT Weights[])\r
65         {\r
66 #if     TRACE\r
67         Log("CalcClustalWWeights\n");\r
68         tree.LogMe();\r
69 #endif\r
70 \r
71         const unsigned uLeafCount = tree.GetLeafCount();\r
72         if (0 == uLeafCount)\r
73                 return;\r
74         else if (1 == uLeafCount)\r
75                 {\r
76                 Weights[0] = (WEIGHT) 1.0;\r
77                 return;\r
78                 }\r
79         else if (2 == uLeafCount)\r
80                 {\r
81                 Weights[0] = (WEIGHT) 0.5;\r
82                 Weights[1] = (WEIGHT) 0.5;\r
83                 return;\r
84                 }\r
85 \r
86         if (!tree.IsRooted())\r
87                 Quit("CalcClustalWWeights requires rooted tree");\r
88 \r
89         const unsigned uNodeCount = tree.GetNodeCount();\r
90         unsigned *LeavesUnderNode = new unsigned[uNodeCount];\r
91         memset(LeavesUnderNode, 0, uNodeCount*sizeof(unsigned));\r
92 \r
93         const unsigned uRootNodeIndex = tree.GetRootNodeIndex();\r
94         unsigned uLeavesUnderRoot = CountLeaves(tree, uRootNodeIndex, LeavesUnderNode);\r
95         if (uLeavesUnderRoot != uLeafCount)\r
96                 Quit("WeightsFromTreee: Internal error, root count %u %u",\r
97                   uLeavesUnderRoot, uLeafCount);\r
98 \r
99 #if     TRACE\r
100         Log("Node  Leaves    Length  Strength\n");\r
101         Log("----  ------  --------  --------\n");\r
102         //    1234  123456  12345678  12345678\r
103 #endif\r
104 \r
105         double *Strengths = new double[uNodeCount];\r
106         for (unsigned uNodeIndex = 0; uNodeIndex < uNodeCount; ++uNodeIndex)\r
107                 {\r
108                 if (tree.IsRoot(uNodeIndex))\r
109                         {\r
110                         Strengths[uNodeIndex] = 0.0;\r
111                         continue;\r
112                         }\r
113                 const unsigned uParent = tree.GetParent(uNodeIndex);\r
114                 const double dLength = tree.GetEdgeLength(uNodeIndex, uParent);\r
115                 const unsigned uLeaves = LeavesUnderNode[uNodeIndex];\r
116                 const double dStrength = dLength / (double) uLeaves;\r
117                 Strengths[uNodeIndex] = dStrength;\r
118 #if     TRACE\r
119                 Log("%4u  %6u  %8g  %8g\n", uNodeIndex, uLeaves, dLength, dStrength);\r
120 #endif\r
121                 }\r
122 \r
123 #if     TRACE\r
124         Log("\n");\r
125         Log("                 Seq  Path..Weight\n");\r
126         Log("--------------------  ------------\n");\r
127 #endif\r
128         for (unsigned n = 0; n < uLeafCount; ++n)\r
129                 {\r
130                 const unsigned uLeafNodeIndex = tree.LeafIndexToNodeIndex(n);\r
131 #if     TRACE\r
132                 Log("%20.20s  %4u ", tree.GetLeafName(uLeafNodeIndex), uLeafNodeIndex);\r
133 #endif\r
134                 if (!tree.IsLeaf(uLeafNodeIndex))\r
135                         Quit("CalcClustalWWeights: leaf");\r
136 \r
137                 double dWeight = 0;\r
138                 unsigned uNode = uLeafNodeIndex;\r
139                 while (!tree.IsRoot(uNode))\r
140                         {\r
141                         dWeight += Strengths[uNode];\r
142                         uNode = tree.GetParent(uNode);\r
143 #if     TRACE\r
144                         Log("->%u(%g)", uNode, Strengths[uNode]);\r
145 #endif\r
146                         }\r
147                 if (dWeight < 0.0001)\r
148                         {\r
149 #if     TRACE\r
150                         Log("zero->one");\r
151 #endif\r
152                         dWeight = 1.0;\r
153                         }\r
154                 Weights[n] = (WEIGHT) dWeight;\r
155 #if     TRACE\r
156                 Log(" = %g\n", dWeight);\r
157 #endif\r
158                 }\r
159 \r
160         delete[] Strengths;\r
161         delete[] LeavesUnderNode;\r
162 \r
163         Normalize(Weights, uLeafCount);\r
164         }\r
165 \r
166 void MSA::SetClustalWWeights(const Tree &tree)\r
167         {\r
168         const unsigned uSeqCount = GetSeqCount();\r
169         const unsigned uLeafCount = tree.GetLeafCount();\r
170 \r
171         WEIGHT *Weights = new WEIGHT[uSeqCount];\r
172 \r
173         CalcClustalWWeights(tree, Weights);\r
174 \r
175         for (unsigned n = 0; n < uLeafCount; ++n)\r
176                 {\r
177                 const WEIGHT w = Weights[n];\r
178                 const unsigned uLeafNodeIndex = tree.LeafIndexToNodeIndex(n);\r
179                 const unsigned uId = tree.GetLeafId(uLeafNodeIndex);\r
180                 const unsigned uSeqIndex = GetSeqIndex(uId);\r
181 #if     DEBUG\r
182                 if (GetSeqName(uSeqIndex) != tree.GetLeafName(uLeafNodeIndex))\r
183                         Quit("MSA::SetClustalWWeights: names don't match");\r
184 #endif\r
185                 SetSeqWeight(uSeqIndex, w);\r
186                 }\r
187         NormalizeWeights((WEIGHT) 1.0);\r
188 \r
189         delete[] Weights;\r
190         }\r