1 /***********************************************
2 * # Copyright 2009-2010. Liu Yongchao
3 * # Contact: Liu Yongchao, School of Computer Engineering,
4 * # Nanyang Technological University.
5 * # Emails: liuy0039@ntu.edu.sg; nkcslyc@hotmail.com
7 * # GPL version 3.0 applies.
9 * ************************************************/
11 #include "MSAClusterTree.h"
12 MSAClusterTree::MSAClusterTree(MSA* msa, VVF& distMatrix, int numSeqs) :
13 MSAGuideTree(msa, distMatrix, numSeqs) {
15 MSAClusterTree::~MSAClusterTree() {
17 void MSAClusterTree::create() {
18 //generate the neighbor-joining tree
19 this->generateClusterTree();
21 //calculate sequence weights
22 this->getSeqsWeights();
24 //construct the alignment orders
25 this->createAlignmentOrders();
27 void MSAClusterTree::generateClusterTree() {
29 ValidNode* validNodes, *headValidNodes;
30 ValidNode* miniPtr, *minjPtr, *ivalid, *jvalid;
33 unsigned int* clusterLeafs;
35 //initialize the valid nodes link list
36 validNodes = new ValidNode[leafsNum + 1];
37 joins = new float[leafsNum + 1];
38 clusterLeafs = new unsigned int[nodesNum + 1];
39 if (!validNodes || !joins || !clusterLeafs) {
40 cerr << "Out of memory of the reconstruction of cluster tree" << endl;
42 //initialize cluster size
43 for (i = 0; i < this->leafsNum; i++) {
47 headValidNodes = &validNodes[0];
48 headValidNodes->next = &validNodes[1];
49 headValidNodes->n = -1;
50 headValidNodes->node = -1;
51 headValidNodes->prev = NULL;
53 //build an initial link list
54 ValidNode* curr = &validNodes[1];
55 ValidNode* prev = headValidNodes;
56 ValidNode* next = &validNodes[2];
57 for (i = 0; i < leafsNum; i++) {
68 //to generate the cluster tree
69 int nodeIdx; //the index of an internal node
70 int firstNode = leafsNum; //the index of the first internal node
71 int lastNode = firstNode + leafsNum - 1;//the index of the last internal node
73 for (nodeIdx = firstNode; nodeIdx < lastNode; nodeIdx++) {
74 //find closest pair of clusters
76 miniPtr = headValidNodes;
77 minjPtr = headValidNodes;
79 for (ivalid = headValidNodes->next; ivalid != NULL;
80 ivalid = ivalid->next) {
83 for (jvalid = headValidNodes->next;
84 jvalid != NULL && jvalid->n < mini; jvalid = jvalid->next) {
86 float dist = (*distMatrix)[mini][minj];
89 << "ERROR: It is impossible to have distance value less than zero"
98 //printf("dist %g mini %d minj %d\n", dist, ivalid->node, jvalid->node);
101 //printf("**** mini %d minj %d minDist %g *****\n", miniPtr->node, minjPtr->node, minDist);
102 //check the validity of miniPtr and minjPtr;
103 if (miniPtr == headValidNodes || minjPtr == headValidNodes) {
104 cerr << "OOPS: Error occurred while constructing the cluster tree\n"
108 //computing branch length and join the two nodes
109 float branchLength = minDist * 0.5f;
110 this->connectNodes(&nodes[nodeIdx], nodeIdx, &nodes[miniPtr->node],
111 branchLength, &nodes[minjPtr->node], branchLength);
112 clusterLeafs[nodeIdx] = clusterLeafs[miniPtr->node]
113 + clusterLeafs[minjPtr->node];
115 //remove the valid node minjPtr from the list
116 minjPtr->prev->next = minjPtr->next;
117 if (minjPtr->next != NULL) {
118 minjPtr->next->prev = minjPtr->prev;
120 minjPtr->prev = minjPtr->next = NULL;
122 //compute the distance of each remaining valid node to the new node
123 for (ivalid = headValidNodes->next; ivalid != NULL;
124 ivalid = ivalid->next) {
127 float idist = (*distMatrix)[miniPtr->n][idx];
128 float jdist = (*distMatrix)[minjPtr->n][idx];
130 unsigned int isize = clusterLeafs[miniPtr->node];
131 unsigned int jsize = clusterLeafs[minjPtr->node];
132 joins[idx] = (idist * isize + jdist * jsize) / (isize + jsize);
133 //joins[idx] = (idist + jdist )/ 2;
135 //update the distance to the new node
136 miniPtr->node = nodeIdx;
138 for (jvalid = headValidNodes->next; jvalid != NULL;
139 jvalid = jvalid->next) {
142 float dist = joins[minj];
143 (*distMatrix)[mini][minj] = dist;
144 (*distMatrix)[minj][mini] = dist;
147 //add a pseudo root to this unrooted NJ tree
148 this->root = &nodes[lastNode - 1];
152 delete[] clusterLeafs;