/*********************************************** * # Copyright 2009-2010. Liu Yongchao * # Contact: Liu Yongchao, School of Computer Engineering, * # Nanyang Technological University. * # Emails: liuy0039@ntu.edu.sg; nkcslyc@hotmail.com * # * # GPL version 3.0 applies. * # * ************************************************/ #include "MSAGuideTree.h" #include "MSA.h" MSAGuideTree::MSAGuideTree(MSA* msa, VVF& distances, int numSeqs) { int i; TreeNode* node; //system configuration this->msa = msa; this->distMatrix = &distances; this->numSeqs = numSeqs; this->seqsWeights = msa->getSeqsWeights(); //tree structure this->nodesSize = this->numSeqs * 2 + 1; this->nodes = new TreeNode[this->nodesSize]; if (!this->nodes) { cerr << "TreeNodes memory allocation failed" << endl; exit(-1); } //initialize all the tree nodes this->leafs = this->nodes; this->leafsNum = this->numSeqs; this->nodesNum = 2 * this->leafsNum - 1; for (i = 0; i < this->nodesSize; i++) { node = &nodes[i]; node->left = 0; node->right = 0; node->parent = 0; node->leftIdx = -1; node->rightIdx = -1; node->parentIdx = -1; node->idx = -1; node->dist = 0; node->leaf = NODE; //setted to be NODE, by default node->order = 0; node->depth = 0; } //initialize the leaf nodes for (i = 0; i < this->leafsNum; i++) { node = &this->leafs[i]; node->idx = i; node->leaf = LEAF; } } MSAGuideTree::~MSAGuideTree() { //release tree nodes delete[] this->nodes; //release alignment orders releaseAlignmentOrders(); } //get the tree nodes TreeNode* MSAGuideTree::getNodes() { return nodes; } //get the leaf nodes TreeNode* MSAGuideTree::getLeafs() { return leafs; } //get the number of nodes; int MSAGuideTree::getNodesNum() { return nodesNum; } //get the number of leaf nodes int MSAGuideTree::getLeafsNum() { return leafsNum; } //get the alignment orders AlignmentOrder* MSAGuideTree::getAlignOrders() { return alignOrders; } int MSAGuideTree::getAlignOrdersNum() { return alignOrdersNum; } /**************************************************** create the evolutionary relationship ****************************************************/ void MSAGuideTree::connectNodes(TreeNode* parent, int parentIdx, TreeNode* leftChild, float leftDist, TreeNode* rightChild, float rightDist) { //save the parents index for each child leftChild->parent = parent; leftChild->parentIdx = parentIdx; rightChild->parent = parent; rightChild->parentIdx = parentIdx; //save the branch lengths (i.e. distance) from each child to its parent leftChild->dist = leftDist; rightChild->dist = rightDist; //save the indices of itself and its children for this new tree node parent->idx = parentIdx; parent->left = leftChild; parent->leftIdx = leftChild->idx; parent->right = rightChild; parent->rightIdx = rightChild->idx; } /***************************************** compute the alignment order of the phylogentic tree *****************************************/ void MSAGuideTree::createAlignmentOrders() { int i; AlignmentOrder* order; //allocate memory space for alignment orders vector this->alignOrdersNum = 0;//for alignment orders, it starts from 1 instead of 0 this->alignOrdersSize = numSeqs;//the number of internal nodes of the phylogentic tree + 1 this->alignOrders = new AlignmentOrder[this->alignOrdersSize]; if (!this->alignOrders) { cerr << "OOPS: Alignment orders memory allocation failed" << endl; exit(-1); } //initialize the alignment orders vector for (i = 0; i < this->alignOrdersSize; i++) { order = &this->alignOrders[i]; order->leftOrder = 0; order->rightOrder = 0; order->leftLeafs = 0; order->leftNum = 0; order->rightLeafs = 0; order->rightNum = 0; } //starting out constructing the alignment orders int subLeafsNum; int nodeDepth = 1; int subOrder = recursiveCreateAlignmentOrders(this->root, 0, subLeafsNum, nodeDepth); //check whether the function works well if (subLeafsNum != numSeqs || this->alignOrdersNum != subOrder) { fprintf(stderr, "The alignment orders constructed were wrong (subLeafsNum %d, alignOrdersNum %d, subOrder %d)\n", subLeafsNum, alignOrdersNum, subOrder); } } int MSAGuideTree::recursiveCreateAlignmentOrders(TreeNode* subRoot, int* subLeafs, int& subLeafsNum, int nodeDepth) { int leftNum, rightNum; int leftOrder, rightOrder; int* leftLeafs, *rightLeafs; if (subRoot->leaf == LEAF) { subLeafs[0] = subRoot->idx; subLeafsNum = 1; return 0; //if it is a leaf, return the index 0 } leftOrder = rightOrder = 0; leftNum = rightNum = 0; leftLeafs = new int[numSeqs]; rightLeafs = new int[numSeqs]; //check the left subtree if (subRoot->left) { //recursively tranverse the left subtree leftOrder = recursiveCreateAlignmentOrders(subRoot->left, leftLeafs, leftNum, nodeDepth + 1); } //check the right subtree if (subRoot->right) { rightOrder = recursiveCreateAlignmentOrders(subRoot->right, rightLeafs, rightNum, nodeDepth + 1); } //save the leafs in the left and right subtrees of the current subtree if (this->alignOrdersNum > this->alignOrdersSize) { fprintf(stderr, "the alignment order function works bad\n");\ exit(-1); } AlignmentOrder* order = &this->alignOrders[++this->alignOrdersNum]; order->nodeDepth = nodeDepth; order->leftOrder = leftOrder; order->rightOrder = rightOrder; order->leftNum = leftNum; order->rightNum = rightNum; order->leftLeafs = new int[order->leftNum]; order->rightLeafs = new int[order->rightNum]; if (!order->leftLeafs || !order->rightLeafs) { fprintf(stderr, "memory allocation failed while recursively constructing alignment orders\n"); exit(-1); } memcpy(order->leftLeafs, leftLeafs, order->leftNum * sizeof(int)); memcpy(order->rightLeafs, rightLeafs, order->rightNum * sizeof(int)); delete[] leftLeafs; delete[] rightLeafs; //for the root of the tree, subLeafs buffer is set to 0 if (subLeafs) { //copy the results to the parent tree node memcpy(subLeafs, order->leftLeafs, order->leftNum * sizeof(int)); memcpy(subLeafs + order->leftNum, order->rightLeafs, order->rightNum * sizeof(int)); } //compute the total number of leafs in this subtree subLeafsNum = order->leftNum + order->rightNum; return this->alignOrdersNum;//return the index of itself, starting from 1, instead of 0 } void MSAGuideTree::releaseAlignmentOrders() { if (!this->alignOrders) { return; } for (int i = 0; i < this->alignOrdersNum; i++) { AlignmentOrder* order = &this->alignOrders[i]; if (order->leftLeafs) { delete[] order->leftLeafs; } if (order->rightLeafs) { delete[] order->rightLeafs; } } delete[] alignOrders; } /******************************** display the alignment orders ********************************/ void MSAGuideTree::displayAlignmentOrders() { int i, j; AlignmentOrder* order; fprintf(stderr, "************DISPLAY ALIGNMENT ORDER***************\n"); for (i = 1; i <= this->alignOrdersNum; i++) { order = &this->alignOrders[i]; fprintf(stderr, "GROUP (%d depth %d):\n---LEFT ORDER: %d\n", i, order->nodeDepth, order->leftOrder); fprintf(stderr, "---LEFT: "); for (j = 0; j < order->leftNum; j++) { fprintf(stderr, "%d ", order->leftLeafs[j]); } fprintf(stderr, "\n---RIGHT ORDER: %d\n", order->rightOrder); fprintf(stderr, "\n---RIGHT: "); for (j = 0; j < order->rightNum; j++) { fprintf(stderr, "%d ", order->rightLeafs[j]); } fprintf(stderr, "\n"); } fprintf(stderr, "*******************************************\n"); } /********************************* display the tree *********************************/ void MSAGuideTree::displayTree() { fprintf(stderr, "**************DISPLAY TREE*********************\n"); for (int i = 0; i < nodesNum; i++) { TreeNode* node = &nodes[i]; fprintf(stderr, "%d(%p): left(%p) %d, right(%p) %d, parent(%p) %d, dist %f\n", (node == &nodes[node->idx]) ? node->idx : -2, node, node->left, (!node->left || node->left == &nodes[node->leftIdx]) ? node->leftIdx : -2, node->right, (!node->right || node->right == &nodes[node->rightIdx]) ? node->rightIdx : -2, node->parent, (!node->parent || node->parent == &nodes[node->parentIdx]) ? node->parentIdx : -2, node->dist); } fprintf(stderr, "*******************************************\n"); } /********************************* compute the sequence weights *********************************/ void MSAGuideTree::getSeqsWeights() { int i; TreeNode* curr; //compute the order of each node, which represents the number of leaf nodes in the substree rooting from it. for (i = 0; i < leafsNum; i++) { //for each leaf nodes curr = &this->leafs[i]; while (curr != 0) { curr->order++; curr = curr->parent; } } //compute the weight of each sequence, which corresponds to a leaf node for (i = 0; i < numSeqs; i++) { //compute the weight of each sequence float weights = 0; curr = &this->leafs[i]; while (curr->parent != 0) { weights += curr->dist / curr->order; curr = curr->parent; //printf("order:%d weights: %f\n", curr->order, weights); } //save the weight of this sequence seqsWeights[i] = (int) (100 * weights); //printf("%d\n", seqsWeights[i]); } //normalize the weights int wsum = 0; for (i = 0; i < numSeqs; i++) { wsum += seqsWeights[i]; } if (wsum == 0) { //in this case, every sequence is assumed to have an identical weight for (i = 0; i < numSeqs; i++) { seqsWeights[i] = 1; } wsum = numSeqs; } //printf("wsum:%d \n", wsum); for (i = 0; i < numSeqs; i++) { seqsWeights[i] = (seqsWeights[i] * INT_MULTIPLY) / wsum; if (seqsWeights[i] < 1) { seqsWeights[i] = 1; } //printf("%d \n", seqsWeights[i]); } } void MSAGuideTree::create() { //do nothing }