Add GLprobs and MSAprobs to binaries
[jabaws.git] / binaries / src / MSAProbs-0.9.7 / MSAProbs / MSAClusterTree.cpp
1 /***********************************************
2  * # Copyright 2009-2010. Liu Yongchao
3  * # Contact: Liu Yongchao, School of Computer Engineering,
4  * #                     Nanyang Technological University.
5  * # Emails:     liuy0039@ntu.edu.sg; nkcslyc@hotmail.com
6  * #
7  * # GPL version 3.0 applies.
8  * #
9  * ************************************************/
10
11 #include "MSAClusterTree.h"
12 MSAClusterTree::MSAClusterTree(MSA* msa, VVF& distMatrix, int numSeqs) :
13                 MSAGuideTree(msa, distMatrix, numSeqs) {
14 }
15 MSAClusterTree::~MSAClusterTree() {
16 }
17 void MSAClusterTree::create() {
18         //generate the neighbor-joining tree
19         this->generateClusterTree();
20
21         //calculate sequence weights
22         this->getSeqsWeights();
23
24         //construct the alignment orders
25         this->createAlignmentOrders();
26 }
27 void MSAClusterTree::generateClusterTree() {
28         int i;
29         ValidNode* validNodes, *headValidNodes;
30         ValidNode* miniPtr, *minjPtr, *ivalid, *jvalid;
31         int mini, minj;
32         float* joins;
33         unsigned int* clusterLeafs;
34
35         //initialize the valid nodes link list
36         validNodes = new ValidNode[leafsNum + 1];
37         joins = new float[leafsNum + 1];
38         clusterLeafs = new unsigned int[nodesNum + 1];
39         if (!validNodes || !joins || !clusterLeafs) {
40                 cerr << "Out of memory of the reconstruction of cluster tree" << endl;
41         }
42         //initialize cluster size 
43         for (i = 0; i < this->leafsNum; i++) {
44                 clusterLeafs[i] = 1;
45         }
46
47         headValidNodes = &validNodes[0];
48         headValidNodes->next = &validNodes[1];
49         headValidNodes->n = -1;
50         headValidNodes->node = -1;
51         headValidNodes->prev = NULL;
52
53         //build an initial link list
54         ValidNode* curr = &validNodes[1];
55         ValidNode* prev = headValidNodes;
56         ValidNode* next = &validNodes[2];
57         for (i = 0; i < leafsNum; i++) {
58                 curr->n = i;
59                 curr->node = i;
60                 curr->prev = prev;
61                 curr->next = next;
62                 prev = curr;
63                 curr = next;
64                 next++;
65         }
66         prev->next = NULL;
67
68         //to generate the cluster tree
69         int nodeIdx;    //the index of an internal node
70         int firstNode = leafsNum;       //the index of the first internal node
71         int lastNode = firstNode + leafsNum - 1;//the index of the last internal node
72
73         for (nodeIdx = firstNode; nodeIdx < lastNode; nodeIdx++) {
74                 //find closest pair of clusters
75                 float minDist = 2.0f;
76                 miniPtr = headValidNodes;
77                 minjPtr = headValidNodes;
78
79                 for (ivalid = headValidNodes->next; ivalid != NULL;
80                                 ivalid = ivalid->next) {
81                         mini = ivalid->n;
82                         for (jvalid = headValidNodes->next;
83                                         jvalid != NULL && jvalid->n < mini; jvalid = jvalid->next) {
84                                 minj = jvalid->n;
85                                 float dist = (*distMatrix)[mini][minj];
86                                 if (dist < 0) {
87                                         cerr
88                                                         << "ERROR: It is impossible to have distance value less than zero"
89                                                         << endl;
90                                         dist = 0;
91                                 }
92                                 if (dist < minDist) {
93                                         minDist = dist;
94                                         miniPtr = ivalid;
95                                         minjPtr = jvalid;
96                                 }
97                                 //printf("dist %g mini %d minj %d\n", dist, ivalid->node, jvalid->node);
98                         }
99                 }
100                 //printf("**** mini %d minj %d minDist %g *****\n", miniPtr->node, minjPtr->node, minDist);
101                 //check the validity of miniPtr and minjPtr;
102                 if (miniPtr == headValidNodes || minjPtr == headValidNodes) {
103                         cerr << "OOPS: Error occurred while constructing the cluster tree\n"
104                                         << endl;
105                         exit(-1);
106                 }
107                 //computing branch length and join the two nodes
108                 float branchLength = minDist * 0.5f;
109                 this->connectNodes(&nodes[nodeIdx], nodeIdx, &nodes[miniPtr->node],
110                                 branchLength, &nodes[minjPtr->node], branchLength);
111                 clusterLeafs[nodeIdx] = clusterLeafs[miniPtr->node]
112                                 + clusterLeafs[minjPtr->node];
113
114                 //remove the valid node minjPtr from the list
115                 minjPtr->prev->next = minjPtr->next;
116                 if (minjPtr->next != NULL) {
117                         minjPtr->next->prev = minjPtr->prev;
118                 }
119                 minjPtr->prev = minjPtr->next = NULL;
120
121                 //compute the distance of each remaining valid node to the new node
122                 for (ivalid = headValidNodes->next; ivalid != NULL;
123                                 ivalid = ivalid->next) {
124                         int idx = ivalid->n;
125
126                         float idist = (*distMatrix)[miniPtr->n][idx];
127                         float jdist = (*distMatrix)[minjPtr->n][idx];
128
129                         unsigned int isize = clusterLeafs[miniPtr->node];
130                         unsigned int jsize = clusterLeafs[minjPtr->node];
131                         joins[idx] = (idist * isize + jdist * jsize) / (isize + jsize);
132                 }
133                 //update the distance to the new node
134                 miniPtr->node = nodeIdx;
135                 mini = miniPtr->n;
136                 for (jvalid = headValidNodes->next; jvalid != NULL;
137                                 jvalid = jvalid->next) {
138                         minj = jvalid->n;
139
140                         float dist = joins[minj];
141                         (*distMatrix)[mini][minj] = dist;
142                         (*distMatrix)[minj][mini] = dist;
143                 }
144         }
145         //add a pseudo root to this unrooted NJ tree
146         this->root = &nodes[lastNode - 1];
147
148         delete[] validNodes;
149         delete[] joins;
150         delete[] clusterLeafs;
151 }