/*********************************************** * # Copyright 2009-2010. Liu Yongchao * # Contact: Liu Yongchao, School of Computer Engineering, * # Nanyang Technological University. * # Emails: liuy0039@ntu.edu.sg; nkcslyc@hotmail.com * # * # GPL version 3.0 applies. * # * ************************************************/ #include "MSAClusterTree.h" MSAClusterTree::MSAClusterTree(MSA* msa, VVF& distMatrix, int numSeqs) : MSAGuideTree(msa, distMatrix, numSeqs) { } MSAClusterTree::~MSAClusterTree() { } void MSAClusterTree::create() { //generate the neighbor-joining tree this->generateClusterTree(); //calculate sequence weights this->getSeqsWeights(); //construct the alignment orders this->createAlignmentOrders(); } void MSAClusterTree::generateClusterTree() { int i; ValidNode* validNodes, *headValidNodes; ValidNode* miniPtr, *minjPtr, *ivalid, *jvalid; int mini, minj; float* joins; unsigned int* clusterLeafs; //initialize the valid nodes link list validNodes = new ValidNode[leafsNum + 1]; joins = new float[leafsNum + 1]; clusterLeafs = new unsigned int[nodesNum + 1]; if (!validNodes || !joins || !clusterLeafs) { cerr << "Out of memory of the reconstruction of cluster tree" << endl; } //initialize cluster size for (i = 0; i < this->leafsNum; i++) { clusterLeafs[i] = 1; } headValidNodes = &validNodes[0]; headValidNodes->next = &validNodes[1]; headValidNodes->n = -1; headValidNodes->node = -1; headValidNodes->prev = NULL; //build an initial link list ValidNode* curr = &validNodes[1]; ValidNode* prev = headValidNodes; ValidNode* next = &validNodes[2]; for (i = 0; i < leafsNum; i++) { curr->n = i; curr->node = i; curr->prev = prev; curr->next = next; prev = curr; curr = next; next++; } prev->next = NULL; //to generate the cluster tree int nodeIdx; //the index of an internal node int firstNode = leafsNum; //the index of the first internal node int lastNode = firstNode + leafsNum - 1;//the index of the last internal node for (nodeIdx = firstNode; nodeIdx < lastNode; nodeIdx++) { //find closest pair of clusters float minDist = 1.1f; miniPtr = headValidNodes; minjPtr = headValidNodes; for (ivalid = headValidNodes->next; ivalid != NULL; ivalid = ivalid->next) { mini = ivalid->n; for (jvalid = headValidNodes->next; jvalid != NULL && jvalid->n < mini; jvalid = jvalid->next) { minj = jvalid->n; float dist = (*distMatrix)[mini][minj]; if (dist < 0) { cerr << "ERROR: It is impossible to have distance value less than zero" << endl; dist = 0; } if (dist < minDist) { minDist = dist; miniPtr = ivalid; minjPtr = jvalid; } //printf("dist %g mini %d minj %d\n", dist, ivalid->node, jvalid->node); } } //printf("**** mini %d minj %d minDist %g *****\n", miniPtr->node, minjPtr->node, minDist); //check the validity of miniPtr and minjPtr; if (miniPtr == headValidNodes || minjPtr == headValidNodes) { cerr << "OOPS: Error occurred while constructing the cluster tree\n" << endl; exit(-1); } //computing branch length and join the two nodes float branchLength = minDist * 0.5f; this->connectNodes(&nodes[nodeIdx], nodeIdx, &nodes[miniPtr->node], branchLength, &nodes[minjPtr->node], branchLength); clusterLeafs[nodeIdx] = clusterLeafs[miniPtr->node] + clusterLeafs[minjPtr->node]; //remove the valid node minjPtr from the list minjPtr->prev->next = minjPtr->next; if (minjPtr->next != NULL) { minjPtr->next->prev = minjPtr->prev; } minjPtr->prev = minjPtr->next = NULL; //compute the distance of each remaining valid node to the new node for (ivalid = headValidNodes->next; ivalid != NULL; ivalid = ivalid->next) { int idx = ivalid->n; float idist = (*distMatrix)[miniPtr->n][idx]; float jdist = (*distMatrix)[minjPtr->n][idx]; unsigned int isize = clusterLeafs[miniPtr->node]; unsigned int jsize = clusterLeafs[minjPtr->node]; joins[idx] = (idist * isize + jdist * jsize) / (isize + jsize); //joins[idx] = (idist + jdist )/ 2; } //update the distance to the new node miniPtr->node = nodeIdx; mini = miniPtr->n; for (jvalid = headValidNodes->next; jvalid != NULL; jvalid = jvalid->next) { minj = jvalid->n; float dist = joins[minj]; (*distMatrix)[mini][minj] = dist; (*distMatrix)[minj][mini] = dist; } } //add a pseudo root to this unrooted NJ tree this->root = &nodes[lastNode - 1]; delete[] validNodes; delete[] joins; delete[] clusterLeafs; }