Add GLprobs and MSAprobs to binaries
[jabaws.git] / binaries / src / GLProbs-1.0 / MSAGuideTree.cpp
diff --git a/binaries/src/GLProbs-1.0/MSAGuideTree.cpp b/binaries/src/GLProbs-1.0/MSAGuideTree.cpp
new file mode 100644 (file)
index 0000000..ec9a5e8
--- /dev/null
@@ -0,0 +1,327 @@
+/***********************************************
+ * # Copyright 2009-2010. Liu Yongchao
+ * # Contact: Liu Yongchao, School of Computer Engineering,
+ * #                    Nanyang Technological University.
+ * # Emails:    liuy0039@ntu.edu.sg; nkcslyc@hotmail.com
+ * #
+ * # GPL version 3.0 applies.
+ * #
+ * ************************************************/
+#include "MSAGuideTree.h"
+#include "MSA.h"
+MSAGuideTree::MSAGuideTree(MSA* msa, VVF& distances, int numSeqs) {
+       int i;
+       TreeNode* node;
+       //system configuration
+       this->msa = msa;
+       this->distMatrix = &distances;
+       this->numSeqs = numSeqs;
+       this->seqsWeights = msa->getSeqsWeights();
+
+       //tree structure
+       this->nodesSize = this->numSeqs * 2 + 1;
+       this->nodes = new TreeNode[this->nodesSize];
+       if (!this->nodes) {
+               cerr << "TreeNodes memory allocation failed" << endl;
+               exit(-1);
+       }
+       //initialize all the tree nodes
+       this->leafs = this->nodes;
+       this->leafsNum = this->numSeqs;
+       this->nodesNum = 2 * this->leafsNum - 1;
+       for (i = 0; i < this->nodesSize; i++) {
+               node = &nodes[i];
+               node->left = 0;
+               node->right = 0;
+               node->parent = 0;
+               node->leftIdx = -1;
+               node->rightIdx = -1;
+               node->parentIdx = -1;
+               node->idx = -1;
+               node->dist = 0;
+               node->leaf = NODE;              //setted to be NODE, by default
+               node->order = 0;
+               node->depth = 0;
+       }
+       //initialize the leaf nodes
+       for (i = 0; i < this->leafsNum; i++) {
+               node = &this->leafs[i];
+               node->idx = i;
+               node->leaf = LEAF;
+       }
+}
+MSAGuideTree::~MSAGuideTree() {
+       //release tree nodes
+       delete[] this->nodes;
+
+       //release alignment orders
+       releaseAlignmentOrders();
+
+}
+//get the tree nodes
+TreeNode* MSAGuideTree::getNodes() {
+       return nodes;
+}
+//get the leaf nodes
+TreeNode* MSAGuideTree::getLeafs() {
+       return leafs;
+}
+//get the number of nodes;
+int MSAGuideTree::getNodesNum() {
+       return nodesNum;
+}
+//get the number of leaf nodes
+int MSAGuideTree::getLeafsNum() {
+       return leafsNum;
+}
+//get the alignment orders
+AlignmentOrder* MSAGuideTree::getAlignOrders() {
+       return alignOrders;
+}
+int MSAGuideTree::getAlignOrdersNum() {
+       return alignOrdersNum;
+}
+/****************************************************
+ create the evolutionary relationship
+ ****************************************************/
+void MSAGuideTree::connectNodes(TreeNode* parent, int parentIdx,
+               TreeNode* leftChild, float leftDist, TreeNode* rightChild,
+               float rightDist) {
+       //save the parents index for each child
+       leftChild->parent = parent;
+       leftChild->parentIdx = parentIdx;
+       rightChild->parent = parent;
+       rightChild->parentIdx = parentIdx;
+
+       //save the branch lengths (i.e. distance) from each child to its parent
+       leftChild->dist = leftDist;
+       rightChild->dist = rightDist;
+
+       //save the indices of itself and its children for this new tree node
+       parent->idx = parentIdx;
+       parent->left = leftChild;
+       parent->leftIdx = leftChild->idx;
+       parent->right = rightChild;
+       parent->rightIdx = rightChild->idx;
+}
+/*****************************************
+ compute the alignment order of the phylogentic tree
+ *****************************************/
+void MSAGuideTree::createAlignmentOrders() {
+       int i;
+
+       AlignmentOrder* order;
+       //allocate memory space for alignment orders vector
+       this->alignOrdersNum = 0;//for alignment orders, it starts from 1 instead of 0
+       this->alignOrdersSize = numSeqs;//the number of internal nodes of the phylogentic tree + 1
+       this->alignOrders = new AlignmentOrder[this->alignOrdersSize];
+       if (!this->alignOrders) {
+               cerr << "OOPS: Alignment orders memory allocation failed" << endl;
+               exit(-1);
+       }
+       //initialize the alignment orders vector
+       for (i = 0; i < this->alignOrdersSize; i++) {
+               order = &this->alignOrders[i];
+               order->leftOrder = 0;
+               order->rightOrder = 0;
+               order->leftLeafs = 0;
+               order->leftNum = 0;
+               order->rightLeafs = 0;
+               order->rightNum = 0;
+       }
+       //starting out constructing the alignment orders
+       int subLeafsNum;
+       int nodeDepth = 1;
+       int subOrder = recursiveCreateAlignmentOrders(this->root, 0, subLeafsNum,
+                       nodeDepth);
+
+       //check whether the function works well
+       if (subLeafsNum != numSeqs || this->alignOrdersNum != subOrder) {
+               fprintf(stderr,
+                               "The alignment orders constructed were wrong (subLeafsNum %d, alignOrdersNum %d, subOrder %d)\n",
+                               subLeafsNum, alignOrdersNum, subOrder);
+       }
+
+}
+int MSAGuideTree::recursiveCreateAlignmentOrders(TreeNode* subRoot,
+               int* subLeafs, int& subLeafsNum, int nodeDepth) {
+       int leftNum, rightNum;
+       int leftOrder, rightOrder;
+       int* leftLeafs, *rightLeafs;
+
+       if (subRoot->leaf == LEAF) {
+               subLeafs[0] = subRoot->idx;
+               subLeafsNum = 1;
+
+               return 0;                       //if it is a leaf, return the index 0
+       }
+       leftOrder = rightOrder = 0;
+       leftNum = rightNum = 0;
+       leftLeafs = new int[numSeqs];
+       rightLeafs = new int[numSeqs];
+
+       //check the left subtree
+       if (subRoot->left) {
+               //recursively tranverse the left subtree
+               leftOrder = recursiveCreateAlignmentOrders(subRoot->left, leftLeafs,
+                               leftNum, nodeDepth + 1);
+       }
+       //check the right subtree
+       if (subRoot->right) {
+               rightOrder = recursiveCreateAlignmentOrders(subRoot->right, rightLeafs,
+                               rightNum, nodeDepth + 1);
+       }
+       //save the leafs in the left and right subtrees of the current subtree
+       if (this->alignOrdersNum > this->alignOrdersSize) {
+               fprintf(stderr, "the alignment order function works bad\n");\
+               exit(-1);
+       }
+
+       AlignmentOrder* order = &this->alignOrders[++this->alignOrdersNum];
+       order->nodeDepth = nodeDepth;
+       order->leftOrder = leftOrder;
+       order->rightOrder = rightOrder;
+       order->leftNum = leftNum;
+       order->rightNum = rightNum;
+       order->leftLeafs = new int[order->leftNum];
+       order->rightLeafs = new int[order->rightNum];
+       if (!order->leftLeafs || !order->rightLeafs) {
+               fprintf(stderr,
+                               "memory allocation failed while recursively constructing alignment orders\n");
+               exit(-1);
+       }
+       memcpy(order->leftLeafs, leftLeafs, order->leftNum * sizeof(int));
+       memcpy(order->rightLeafs, rightLeafs, order->rightNum * sizeof(int));
+
+       delete[] leftLeafs;
+       delete[] rightLeafs;
+
+       //for the root of the tree, subLeafs buffer is set to 0
+       if (subLeafs) {
+               //copy the results to the parent tree node
+               memcpy(subLeafs, order->leftLeafs, order->leftNum * sizeof(int));
+               memcpy(subLeafs + order->leftNum, order->rightLeafs,
+                               order->rightNum * sizeof(int));
+       }
+       //compute the total number of leafs in this subtree
+       subLeafsNum = order->leftNum + order->rightNum;
+
+       return this->alignOrdersNum;//return the index of itself, starting from 1, instead of 0
+}
+void MSAGuideTree::releaseAlignmentOrders() {
+       if (!this->alignOrders) {
+               return;
+       }
+       for (int i = 0; i < this->alignOrdersNum; i++) {
+               AlignmentOrder* order = &this->alignOrders[i];
+               if (order->leftLeafs) {
+                       delete[] order->leftLeafs;
+               }
+               if (order->rightLeafs) {
+                       delete[] order->rightLeafs;
+               }
+       }
+       delete[] alignOrders;
+}
+/********************************
+ display the alignment orders
+ ********************************/
+void MSAGuideTree::displayAlignmentOrders() {
+       int i, j;
+       AlignmentOrder* order;
+       fprintf(stderr, "************DISPLAY ALIGNMENT ORDER***************\n");
+       for (i = 1; i <= this->alignOrdersNum; i++) {
+               order = &this->alignOrders[i];
+
+               fprintf(stderr, "GROUP (%d depth %d):\n---LEFT ORDER: %d\n", i,
+                               order->nodeDepth, order->leftOrder);
+               fprintf(stderr, "---LEFT: ");
+               for (j = 0; j < order->leftNum; j++) {
+                       fprintf(stderr, "%d ", order->leftLeafs[j]);
+               }
+
+               fprintf(stderr, "\n---RIGHT ORDER: %d\n", order->rightOrder);
+               fprintf(stderr, "\n---RIGHT: ");
+               for (j = 0; j < order->rightNum; j++) {
+                       fprintf(stderr, "%d ", order->rightLeafs[j]);
+               }
+               fprintf(stderr, "\n");
+       }
+       fprintf(stderr, "*******************************************\n");
+}
+/*********************************
+ display the tree
+ *********************************/
+void MSAGuideTree::displayTree() {
+       fprintf(stderr, "**************DISPLAY TREE*********************\n");
+       for (int i = 0; i < nodesNum; i++) {
+               TreeNode* node = &nodes[i];
+
+               fprintf(stderr,
+                               "%d(%p): left(%p) %d, right(%p) %d, parent(%p) %d, dist %f\n",
+                               (node == &nodes[node->idx]) ? node->idx : -2, node, node->left,
+                               (!node->left || node->left == &nodes[node->leftIdx]) ?
+                                               node->leftIdx : -2, node->right,
+                               (!node->right || node->right == &nodes[node->rightIdx]) ?
+                                               node->rightIdx : -2, node->parent,
+                               (!node->parent || node->parent == &nodes[node->parentIdx]) ?
+                                               node->parentIdx : -2, node->dist);
+       }
+       fprintf(stderr, "*******************************************\n");
+}
+/*********************************
+ compute the sequence weights
+ *********************************/
+void MSAGuideTree::getSeqsWeights() {
+       int i;
+       TreeNode* curr;
+
+       //compute the order of each node, which represents the number of leaf nodes in the substree rooting from it.
+       for (i = 0; i < leafsNum; i++) {
+               //for each leaf nodes
+               curr = &this->leafs[i];
+               while (curr != 0) {
+                       curr->order++;
+
+                       curr = curr->parent;
+               }
+       }
+       //compute the weight of each sequence, which corresponds to a leaf node
+       for (i = 0; i < numSeqs; i++) {
+               //compute the weight of each sequence
+               float weights = 0;
+               curr = &this->leafs[i];
+               while (curr->parent != 0) {
+                       weights += curr->dist / curr->order;
+                       curr = curr->parent;
+                       //printf("order:%d weights: %f\n", curr->order, weights);
+               }
+               //save the weight of this sequence
+               seqsWeights[i] = (int) (100 * weights);
+               //printf("%d\n", seqsWeights[i]);
+       }
+       //normalize the weights 
+       int wsum = 0;
+       for (i = 0; i < numSeqs; i++) {
+               wsum += seqsWeights[i];
+       }
+       if (wsum == 0) {
+               //in this case, every sequence is assumed to have an identical weight
+               for (i = 0; i < numSeqs; i++) {
+                       seqsWeights[i] = 1;
+               }
+               wsum = numSeqs;
+       }
+       //printf("wsum:%d \n", wsum);
+       for (i = 0; i < numSeqs; i++) {
+               seqsWeights[i] = (seqsWeights[i] * INT_MULTIPLY) / wsum;
+               if (seqsWeights[i] < 1) { 
+                       seqsWeights[i] = 1;
+               }
+               //printf("%d \n", seqsWeights[i]);
+       }
+}
+void MSAGuideTree::create() {
+       //do nothing
+}
+