1 /////////////////////////////////////////////////////////////////
4 // Utilities for reading/writing multiple sequence data.
5 /////////////////////////////////////////////////////////////////
7 #ifndef EVOLUTIONARYTREE_H
8 #define EVOLUTIONARYTREE_H
13 #include "SafeVector.h"
14 #include "MultiSequence.h"
19 /////////////////////////////////////////////////////////////////
22 // The fundamental unit for representing an alignment tree. The
23 // guide tree is represented as a binary tree.
24 /////////////////////////////////////////////////////////////////
27 int sequenceLabel; // sequence label
28 TreeNode *left, *right, *parent; // pointers to left, right children
30 /////////////////////////////////////////////////////////////////
31 // TreeNode::PrintNode()
33 // Internal routine used to print out the sequence comments
34 // associated with the evolutionary tree, using a hierarchical
35 // parenthesized format.
36 /////////////////////////////////////////////////////////////////
38 void PrintNode (ostream &outfile, const MultiSequence *sequences) const {
40 // if this is a leaf node, print out the associated sequence comment
41 if (sequenceLabel >= 0)
42 outfile << sequences->GetSequence (sequenceLabel)->GetHeader();
44 // otherwise, it must have two children; print out their subtrees recursively
50 left->PrintNode (outfile, sequences);
52 right->PrintNode (outfile, sequences);
59 /////////////////////////////////////////////////////////////////
60 // TreeNode::TreeNode()
62 // Constructor for a tree node. Note that sequenceLabel = -1
63 // implies that the current node is not a leaf in the tree.
64 /////////////////////////////////////////////////////////////////
66 TreeNode (int sequenceLabel) : sequenceLabel (sequenceLabel),
67 left (NULL), right (NULL), parent (NULL) {
68 assert (sequenceLabel >= -1);
71 /////////////////////////////////////////////////////////////////
72 // TreeNode::~TreeNode()
74 // Destructor for a tree node. Recursively deletes all children.
75 /////////////////////////////////////////////////////////////////
78 if (left){ delete left; left = NULL; }
79 if (right){ delete right; right = NULL; }
85 int GetSequenceLabel () const { return sequenceLabel; }
86 TreeNode *GetLeftChild () const { return left; }
87 TreeNode *GetRightChild () const { return right; }
88 TreeNode *GetParent () const { return parent; }
91 void SetSequenceLabel (int sequenceLabel){ this->sequenceLabel = sequenceLabel; assert (sequenceLabel >= -1); }
92 void SetLeftChild (TreeNode *left){ this->left = left; }
93 void SetRightChild (TreeNode *right){ this->right = right; }
94 void SetParent (TreeNode *parent){ this->parent = parent; }
96 /////////////////////////////////////////////////////////////////
97 // TreeNode::ComputeTree()
99 // Routine used to compute an evolutionary tree based on the
100 // given distance matrix. We assume the distance matrix has the
101 // form, distMatrix[i][j] = expected accuracy of aligning i with j.
102 /////////////////////////////////////////////////////////////////
104 static TreeNode *ComputeTree (const VVF &distMatrix){
106 int numSeqs = distMatrix.size(); // number of sequences in distance matrix
107 VVF distances (numSeqs, VF (numSeqs)); // a copy of the distance matrix
108 SafeVector<TreeNode *> nodes (numSeqs, NULL); // list of nodes for each sequence
109 SafeVector<int> valid (numSeqs, 1); // valid[i] tells whether or not the ith
110 // nodes in the distances and nodes array
113 // initialization: make a copy of the distance matrix
114 for (int i = 0; i < numSeqs; i++)
115 for (int j = 0; j < numSeqs; j++)
116 distances[i][j] = distMatrix[i][j];
118 // initialization: create all the leaf nodes
119 for (int i = 0; i < numSeqs; i++){
120 nodes[i] = new TreeNode (i);
124 // repeat until only a single node left
125 for (int numNodesLeft = numSeqs; numNodesLeft > 1; numNodesLeft--){
127 pair<int,int> bestPair;
129 // find the closest pair
130 for (int i = 0; i < numSeqs; i++) if (valid[i]){
131 for (int j = i+1; j < numSeqs; j++) if (valid[j]){
132 if (distances[i][j] > bestProb){
133 bestProb = distances[i][j];
134 bestPair = make_pair(i, j);
139 // merge the closest pair
140 TreeNode *newParent = new TreeNode (-1);
141 newParent->SetLeftChild (nodes[bestPair.first]);
142 newParent->SetRightChild (nodes[bestPair.second]);
143 nodes[bestPair.first]->SetParent (newParent);
144 nodes[bestPair.second]->SetParent (newParent);
145 nodes[bestPair.first] = newParent;
146 nodes[bestPair.second] = NULL;
148 // now update the distance matrix
149 for (int i = 0; i < numSeqs; i++) if (valid[i]){
150 distances[bestPair.first][i] = distances[i][bestPair.first]
151 = (distances[i][bestPair.first] + distances[i][bestPair.second]) * bestProb / 2;
154 // finally, mark the second node entry as no longer valid
155 valid[bestPair.second] = 0;
162 /////////////////////////////////////////////////////////////////
165 // Print out the subtree associated with this node in a
166 // parenthesized representation.
167 /////////////////////////////////////////////////////////////////
169 void Print (ostream &outfile, const MultiSequence *sequences) const {
170 outfile << "Alignment tree: ";
171 PrintNode (outfile, sequences);