1 /***********************************************
2 * # Copyright 2009-2010. Liu Yongchao
3 * # Contact: Liu Yongchao, School of Computer Engineering,
4 * # Nanyang Technological University.
5 * # Emails: liuy0039@ntu.edu.sg; nkcslyc@hotmail.com
7 * # GPL version 3.0 applies.
9 * ************************************************/
10 #include "MSAGuideTree.h"
12 MSAGuideTree::MSAGuideTree(MSA* msa, VVF& distances, int numSeqs) {
15 //system configuration
17 this->distMatrix = &distances;
18 this->numSeqs = numSeqs;
19 this->seqsWeights = msa->getSeqsWeights();
22 this->nodesSize = this->numSeqs * 2 + 1;
23 this->nodes = new TreeNode[this->nodesSize];
25 cerr << "TreeNodes memory allocation failed" << endl;
28 //initialize all the tree nodes
29 this->leafs = this->nodes;
30 this->leafsNum = this->numSeqs;
31 this->nodesNum = 2 * this->leafsNum - 1;
32 for (i = 0; i < this->nodesSize; i++) {
42 node->leaf = NODE; //setted to be NODE, by default
46 //initialize the leaf nodes
47 for (i = 0; i < this->leafsNum; i++) {
48 node = &this->leafs[i];
53 MSAGuideTree::~MSAGuideTree() {
57 //release alignment orders
58 releaseAlignmentOrders();
62 TreeNode* MSAGuideTree::getNodes() {
66 TreeNode* MSAGuideTree::getLeafs() {
69 //get the number of nodes;
70 int MSAGuideTree::getNodesNum() {
73 //get the number of leaf nodes
74 int MSAGuideTree::getLeafsNum() {
77 //get the alignment orders
78 AlignmentOrder* MSAGuideTree::getAlignOrders() {
81 int MSAGuideTree::getAlignOrdersNum() {
82 return alignOrdersNum;
84 /****************************************************
85 create the evolutionary relationship
86 ****************************************************/
87 void MSAGuideTree::connectNodes(TreeNode* parent, int parentIdx,
88 TreeNode* leftChild, float leftDist, TreeNode* rightChild,
90 //save the parents index for each child
91 leftChild->parent = parent;
92 leftChild->parentIdx = parentIdx;
93 rightChild->parent = parent;
94 rightChild->parentIdx = parentIdx;
96 //save the branch lengths (i.e. distance) from each child to its parent
97 leftChild->dist = leftDist;
98 rightChild->dist = rightDist;
100 //save the indices of itself and its children for this new tree node
101 parent->idx = parentIdx;
102 parent->left = leftChild;
103 parent->leftIdx = leftChild->idx;
104 parent->right = rightChild;
105 parent->rightIdx = rightChild->idx;
107 /*****************************************
108 compute the alignment order of the phylogentic tree
109 *****************************************/
110 void MSAGuideTree::createAlignmentOrders() {
113 AlignmentOrder* order;
114 //allocate memory space for alignment orders vector
115 this->alignOrdersNum = 0;//for alignment orders, it starts from 1 instead of 0
116 this->alignOrdersSize = numSeqs;//the number of internal nodes of the phylogentic tree + 1
117 this->alignOrders = new AlignmentOrder[this->alignOrdersSize];
118 if (!this->alignOrders) {
119 cerr << "OOPS: Alignment orders memory allocation failed" << endl;
122 //initialize the alignment orders vector
123 for (i = 0; i < this->alignOrdersSize; i++) {
124 order = &this->alignOrders[i];
125 order->leftOrder = 0;
126 order->rightOrder = 0;
127 order->leftLeafs = 0;
129 order->rightLeafs = 0;
132 //starting out constructing the alignment orders
135 int subOrder = recursiveCreateAlignmentOrders(this->root, 0, subLeafsNum,
138 //check whether the function works well
139 if (subLeafsNum != numSeqs || this->alignOrdersNum != subOrder) {
141 "The alignment orders constructed were wrong (subLeafsNum %d, alignOrdersNum %d, subOrder %d)\n",
142 subLeafsNum, alignOrdersNum, subOrder);
146 int MSAGuideTree::recursiveCreateAlignmentOrders(TreeNode* subRoot,
147 int* subLeafs, int& subLeafsNum, int nodeDepth) {
148 int leftNum, rightNum;
149 int leftOrder, rightOrder;
150 int* leftLeafs, *rightLeafs;
152 if (subRoot->leaf == LEAF) {
153 subLeafs[0] = subRoot->idx;
156 return 0; //if it is a leaf, return the index 0
158 leftOrder = rightOrder = 0;
159 leftNum = rightNum = 0;
160 leftLeafs = new int[numSeqs];
161 rightLeafs = new int[numSeqs];
163 //check the left subtree
165 //recursively tranverse the left subtree
166 leftOrder = recursiveCreateAlignmentOrders(subRoot->left, leftLeafs,
167 leftNum, nodeDepth + 1);
169 //check the right subtree
170 if (subRoot->right) {
171 rightOrder = recursiveCreateAlignmentOrders(subRoot->right, rightLeafs,
172 rightNum, nodeDepth + 1);
174 //save the leafs in the left and right subtrees of the current subtree
175 if (this->alignOrdersNum > this->alignOrdersSize) {
176 fprintf(stderr, "the alignment order function works bad\n");\
180 AlignmentOrder* order = &this->alignOrders[++this->alignOrdersNum];
181 order->nodeDepth = nodeDepth;
182 order->leftOrder = leftOrder;
183 order->rightOrder = rightOrder;
184 order->leftNum = leftNum;
185 order->rightNum = rightNum;
186 order->leftLeafs = new int[order->leftNum];
187 order->rightLeafs = new int[order->rightNum];
188 if (!order->leftLeafs || !order->rightLeafs) {
190 "memory allocation failed while recursively constructing alignment orders\n");
193 memcpy(order->leftLeafs, leftLeafs, order->leftNum * sizeof(int));
194 memcpy(order->rightLeafs, rightLeafs, order->rightNum * sizeof(int));
199 //for the root of the tree, subLeafs buffer is set to 0
201 //copy the results to the parent tree node
202 memcpy(subLeafs, order->leftLeafs, order->leftNum * sizeof(int));
203 memcpy(subLeafs + order->leftNum, order->rightLeafs,
204 order->rightNum * sizeof(int));
206 //compute the total number of leafs in this subtree
207 subLeafsNum = order->leftNum + order->rightNum;
209 return this->alignOrdersNum;//return the index of itself, starting from 1, instead of 0
211 void MSAGuideTree::releaseAlignmentOrders() {
212 if (!this->alignOrders) {
215 for (int i = 0; i < this->alignOrdersNum; i++) {
216 AlignmentOrder* order = &this->alignOrders[i];
217 if (order->leftLeafs) {
218 delete[] order->leftLeafs;
220 if (order->rightLeafs) {
221 delete[] order->rightLeafs;
224 delete[] alignOrders;
226 /********************************
227 display the alignment orders
228 ********************************/
229 void MSAGuideTree::displayAlignmentOrders() {
231 AlignmentOrder* order;
232 fprintf(stderr, "************DISPLAY ALIGNMENT ORDER***************\n");
233 for (i = 1; i <= this->alignOrdersNum; i++) {
234 order = &this->alignOrders[i];
236 fprintf(stderr, "GROUP (%d depth %d):\n---LEFT ORDER: %d\n", i,
237 order->nodeDepth, order->leftOrder);
238 fprintf(stderr, "---LEFT: ");
239 for (j = 0; j < order->leftNum; j++) {
240 fprintf(stderr, "%d ", order->leftLeafs[j]);
243 fprintf(stderr, "\n---RIGHT ORDER: %d\n", order->rightOrder);
244 fprintf(stderr, "\n---RIGHT: ");
245 for (j = 0; j < order->rightNum; j++) {
246 fprintf(stderr, "%d ", order->rightLeafs[j]);
248 fprintf(stderr, "\n");
250 fprintf(stderr, "*******************************************\n");
252 /*********************************
254 *********************************/
255 void MSAGuideTree::displayTree() {
256 fprintf(stderr, "**************DISPLAY TREE*********************\n");
257 for (int i = 0; i < nodesNum; i++) {
258 TreeNode* node = &nodes[i];
261 "%d(%p): left(%p) %d, right(%p) %d, parent(%p) %d, dist %f\n",
262 (node == &nodes[node->idx]) ? node->idx : -2, node, node->left,
263 (!node->left || node->left == &nodes[node->leftIdx]) ?
264 node->leftIdx : -2, node->right,
265 (!node->right || node->right == &nodes[node->rightIdx]) ?
266 node->rightIdx : -2, node->parent,
267 (!node->parent || node->parent == &nodes[node->parentIdx]) ?
268 node->parentIdx : -2, node->dist);
270 fprintf(stderr, "*******************************************\n");
272 /*********************************
273 compute the sequence weights
274 *********************************/
275 void MSAGuideTree::getSeqsWeights() {
279 //compute the order of each node, which represents the number of leaf nodes in the substree rooting from it.
280 for (i = 0; i < leafsNum; i++) {
281 //for each leaf nodes
282 curr = &this->leafs[i];
289 //compute the weight of each sequence, which corresponds to a leaf node
290 for (i = 0; i < numSeqs; i++) {
291 //compute the weight of each sequence
293 curr = &this->leafs[i];
294 while (curr->parent != 0) {
295 weights += curr->dist / curr->order;
297 //printf("order:%d weights: %f\n", curr->order, weights);
299 //save the weight of this sequence
300 seqsWeights[i] = (int) (100 * weights);
301 //printf("%d\n", seqsWeights[i]);
303 //normalize the weights
305 for (i = 0; i < numSeqs; i++) {
306 wsum += seqsWeights[i];
309 //in this case, every sequence is assumed to have an identical weight
310 for (i = 0; i < numSeqs; i++) {
315 //printf("wsum:%d \n", wsum);
316 for (i = 0; i < numSeqs; i++) {
317 seqsWeights[i] = (seqsWeights[i] * INT_MULTIPLY) / wsum;
318 if (seqsWeights[i] < 1) {
321 //printf("%d \n", seqsWeights[i]);
324 void MSAGuideTree::create() {