Add GLprobs and MSAprobs to binaries
[jabaws.git] / binaries / src / MSAProbs-0.9.7 / MSAProbs / MSAGuideTree.cpp
1 /***********************************************
2  * # Copyright 2009-2010. Liu Yongchao
3  * # Contact: Liu Yongchao, School of Computer Engineering,
4  * #                     Nanyang Technological University.
5  * # Emails:     liuy0039@ntu.edu.sg; nkcslyc@hotmail.com
6  * #
7  * # GPL version 3.0 applies.
8  * #
9  * ************************************************/
10 #include "MSAGuideTree.h"
11 #include "MSA.h"
12 MSAGuideTree::MSAGuideTree(MSA* msa, VVF& distances, int numSeqs) {
13         int i;
14         TreeNode* node;
15         //system configuration
16         this->msa = msa;
17         this->distMatrix = &distances;
18         this->numSeqs = numSeqs;
19         this->seqsWeights = msa->getSeqsWeights();
20
21         //tree structure
22         this->nodesSize = this->numSeqs * 2 + 1;
23         this->nodes = new TreeNode[this->nodesSize];
24         if (!this->nodes) {
25                 cerr << "TreeNodes memory allocation failed" << endl;
26                 exit(-1);
27         }
28         //initialize all the tree nodes
29         this->leafs = this->nodes;
30         this->leafsNum = this->numSeqs;
31         this->nodesNum = 2 * this->leafsNum - 1;
32         for (i = 0; i < this->nodesSize; i++) {
33                 node = &nodes[i];
34                 node->left = 0;
35                 node->right = 0;
36                 node->parent = 0;
37                 node->leftIdx = -1;
38                 node->rightIdx = -1;
39                 node->parentIdx = -1;
40                 node->idx = -1;
41                 node->dist = 0;
42                 node->leaf = NODE;              //setted to be NODE, by default
43                 node->order = 0;
44                 node->depth = 0;
45         }
46         //initialize the leaf nodes
47         for (i = 0; i < this->leafsNum; i++) {
48                 node = &this->leafs[i];
49                 node->idx = i;
50                 node->leaf = LEAF;
51         }
52 }
53 MSAGuideTree::~MSAGuideTree() {
54         //release tree nodes
55         delete[] this->nodes;
56
57         //release alignment orders
58         releaseAlignmentOrders();
59
60 }
61 //get the tree nodes
62 TreeNode* MSAGuideTree::getNodes() {
63         return nodes;
64 }
65 //get the leaf nodes
66 TreeNode* MSAGuideTree::getLeafs() {
67         return leafs;
68 }
69 //get the number of nodes;
70 int MSAGuideTree::getNodesNum() {
71         return nodesNum;
72 }
73 //get the number of leaf nodes
74 int MSAGuideTree::getLeafsNum() {
75         return leafsNum;
76 }
77 //get the alignment orders
78 AlignmentOrder* MSAGuideTree::getAlignOrders() {
79         return alignOrders;
80 }
81 int MSAGuideTree::getAlignOrdersNum() {
82         return alignOrdersNum;
83 }
84 /****************************************************
85  create the evolutionary relationship
86  ****************************************************/
87 void MSAGuideTree::connectNodes(TreeNode* parent, int parentIdx,
88                 TreeNode* leftChild, float leftDist, TreeNode* rightChild,
89                 float rightDist) {
90         //save the parents index for each child
91         leftChild->parent = parent;
92         leftChild->parentIdx = parentIdx;
93         rightChild->parent = parent;
94         rightChild->parentIdx = parentIdx;
95
96         //save the branch lengths (i.e. distance) from each child to its parent
97         leftChild->dist = leftDist;
98         rightChild->dist = rightDist;
99
100         //save the indices of itself and its children for this new tree node
101         parent->idx = parentIdx;
102         parent->left = leftChild;
103         parent->leftIdx = leftChild->idx;
104         parent->right = rightChild;
105         parent->rightIdx = rightChild->idx;
106 }
107 /*****************************************
108  compute the alignment order of the phylogentic tree
109  *****************************************/
110 void MSAGuideTree::createAlignmentOrders() {
111         int i;
112
113         AlignmentOrder* order;
114         //allocate memory space for alignment orders vector
115         this->alignOrdersNum = 0;//for alignment orders, it starts from 1 instead of 0
116         this->alignOrdersSize = numSeqs;//the number of internal nodes of the phylogentic tree + 1
117         this->alignOrders = new AlignmentOrder[this->alignOrdersSize];
118         if (!this->alignOrders) {
119                 cerr << "OOPS: Alignment orders memory allocation failed" << endl;
120                 exit(-1);
121         }
122         //initialize the alignment orders vector
123         for (i = 0; i < this->alignOrdersSize; i++) {
124                 order = &this->alignOrders[i];
125                 order->leftOrder = 0;
126                 order->rightOrder = 0;
127                 order->leftLeafs = 0;
128                 order->leftNum = 0;
129                 order->rightLeafs = 0;
130                 order->rightNum = 0;
131         }
132         //starting out constructing the alignment orders
133         int subLeafsNum;
134         int nodeDepth = 1;
135         int subOrder = recursiveCreateAlignmentOrders(this->root, 0, subLeafsNum,
136                         nodeDepth);
137
138         //check whether the function works well
139         if (subLeafsNum != numSeqs || this->alignOrdersNum != subOrder) {
140                 fprintf(stderr,
141                                 "The alignment orders constructed were wrong (subLeafsNum %d, alignOrdersNum %d, subOrder %d)\n",
142                                 subLeafsNum, alignOrdersNum, subOrder);
143         }
144
145 }
146 int MSAGuideTree::recursiveCreateAlignmentOrders(TreeNode* subRoot,
147                 int* subLeafs, int& subLeafsNum, int nodeDepth) {
148         int leftNum, rightNum;
149         int leftOrder, rightOrder;
150         int* leftLeafs, *rightLeafs;
151
152         if (subRoot->leaf == LEAF) {
153                 subLeafs[0] = subRoot->idx;
154                 subLeafsNum = 1;
155
156                 return 0;                       //if it is a leaf, return the index 0
157         }
158         leftOrder = rightOrder = 0;
159         leftNum = rightNum = 0;
160         leftLeafs = new int[numSeqs];
161         rightLeafs = new int[numSeqs];
162
163         //check the left subtree
164         if (subRoot->left) {
165                 //recursively tranverse the left subtree
166                 leftOrder = recursiveCreateAlignmentOrders(subRoot->left, leftLeafs,
167                                 leftNum, nodeDepth + 1);
168         }
169         //check the right subtree
170         if (subRoot->right) {
171                 rightOrder = recursiveCreateAlignmentOrders(subRoot->right, rightLeafs,
172                                 rightNum, nodeDepth + 1);
173         }
174         //save the leafs in the left and right subtrees of the current subtree
175         if (this->alignOrdersNum > this->alignOrdersSize) {
176                 fprintf(stderr, "the alignment order function works bad\n");\\r
177                 exit(-1);
178         }
179
180         AlignmentOrder* order = &this->alignOrders[++this->alignOrdersNum];
181         order->nodeDepth = nodeDepth;
182         order->leftOrder = leftOrder;
183         order->rightOrder = rightOrder;
184         order->leftNum = leftNum;
185         order->rightNum = rightNum;
186         order->leftLeafs = new int[order->leftNum];
187         order->rightLeafs = new int[order->rightNum];
188         if (!order->leftLeafs || !order->rightLeafs) {
189                 fprintf(stderr,
190                                 "memory allocation failed while recursively constructing alignment orders\n");
191                 exit(-1);
192         }
193         memcpy(order->leftLeafs, leftLeafs, order->leftNum * sizeof(int));
194         memcpy(order->rightLeafs, rightLeafs, order->rightNum * sizeof(int));
195
196         delete[] leftLeafs;
197         delete[] rightLeafs;
198
199         //for the root of the tree, subLeafs buffer is set to 0
200         if (subLeafs) {
201                 //copy the results to the parent tree node
202                 memcpy(subLeafs, order->leftLeafs, order->leftNum * sizeof(int));
203                 memcpy(subLeafs + order->leftNum, order->rightLeafs,
204                                 order->rightNum * sizeof(int));
205         }
206         //compute the total number of leafs in this subtree
207         subLeafsNum = order->leftNum + order->rightNum;
208
209         return this->alignOrdersNum;//return the index of itself, starting from 1, instead of 0
210 }
211 void MSAGuideTree::releaseAlignmentOrders() {
212         if (!this->alignOrders) {
213                 return;
214         }
215         for (int i = 0; i < this->alignOrdersNum; i++) {
216                 AlignmentOrder* order = &this->alignOrders[i];
217                 if (order->leftLeafs) {
218                         delete[] order->leftLeafs;
219                 }
220                 if (order->rightLeafs) {
221                         delete[] order->rightLeafs;
222                 }
223         }
224         delete[] alignOrders;
225 }
226 /********************************
227  display the alignment orders
228  ********************************/
229 void MSAGuideTree::displayAlignmentOrders() {
230         int i, j;
231         AlignmentOrder* order;
232         fprintf(stderr, "************DISPLAY ALIGNMENT ORDER***************\n");
233         for (i = 1; i <= this->alignOrdersNum; i++) {
234                 order = &this->alignOrders[i];
235
236                 fprintf(stderr, "GROUP (%d depth %d):\n---LEFT ORDER: %d\n", i,
237                                 order->nodeDepth, order->leftOrder);
238                 fprintf(stderr, "---LEFT: ");
239                 for (j = 0; j < order->leftNum; j++) {
240                         fprintf(stderr, "%d ", order->leftLeafs[j]);
241                 }
242
243                 fprintf(stderr, "\n---RIGHT ORDER: %d\n", order->rightOrder);
244                 fprintf(stderr, "\n---RIGHT: ");
245                 for (j = 0; j < order->rightNum; j++) {
246                         fprintf(stderr, "%d ", order->rightLeafs[j]);
247                 }
248                 fprintf(stderr, "\n");
249         }
250         fprintf(stderr, "*******************************************\n");
251 }
252 /*********************************
253  display the tree
254  *********************************/
255 void MSAGuideTree::displayTree() {
256         fprintf(stderr, "**************DISPLAY TREE*********************\n");
257         for (int i = 0; i < nodesNum; i++) {
258                 TreeNode* node = &nodes[i];
259
260                 fprintf(stderr,
261                                 "%d(%p): left(%p) %d, right(%p) %d, parent(%p) %d, dist %f\n",
262                                 (node == &nodes[node->idx]) ? node->idx : -2, node, node->left,
263                                 (!node->left || node->left == &nodes[node->leftIdx]) ?
264                                                 node->leftIdx : -2, node->right,
265                                 (!node->right || node->right == &nodes[node->rightIdx]) ?
266                                                 node->rightIdx : -2, node->parent,
267                                 (!node->parent || node->parent == &nodes[node->parentIdx]) ?
268                                                 node->parentIdx : -2, node->dist);
269         }
270         fprintf(stderr, "*******************************************\n");
271 }
272 /*********************************
273  compute the sequence weights
274  *********************************/
275 void MSAGuideTree::getSeqsWeights() {
276         int i;
277         TreeNode* curr;
278
279         //compute the order of each node, which represents the number of leaf nodes in the substree rooting from it.
280         for (i = 0; i < leafsNum; i++) {
281                 //for each leaf nodes
282                 curr = &this->leafs[i];
283                 while (curr != 0) {
284                         curr->order++;
285
286                         curr = curr->parent;
287                 }
288         }
289         //compute the weight of each sequence, which corresponds to a leaf node
290         for (i = 0; i < numSeqs; i++) {
291                 //compute the weight of each sequence
292                 float weights = 0;
293                 curr = &this->leafs[i];
294                 while (curr->parent != 0) {
295                         weights += curr->dist / curr->order;
296                         curr = curr->parent;
297                         //printf("order:%d weights: %f\n", curr->order, weights);
298                 }
299                 //save the weight of this sequence
300                 seqsWeights[i] = (int) (100 * weights);
301                 //printf("%d\n", seqsWeights[i]);
302         }
303         //normalize the weights 
304         int wsum = 0;
305         for (i = 0; i < numSeqs; i++) {
306                 wsum += seqsWeights[i];
307         }
308         if (wsum == 0) {
309                 //in this case, every sequence is assumed to have an identical weight
310                 for (i = 0; i < numSeqs; i++) {
311                         seqsWeights[i] = 1;
312                 }
313                 wsum = numSeqs;
314         }
315         //printf("wsum:%d \n", wsum);
316         for (i = 0; i < numSeqs; i++) {
317                 seqsWeights[i] = (seqsWeights[i] * INT_MULTIPLY) / wsum;
318                 if (seqsWeights[i] < 1) {
319                         seqsWeights[i] = 1;
320                 }
321                 //printf("%d \n", seqsWeights[i]);
322         }
323 }
324 void MSAGuideTree::create() {
325         //do nothing
326 }
327