--- /dev/null
+/////////////////////////////////////////////////////////////////
+// EvolutionaryTree.hpp
+//
+// Utilities for reading/writing multiple sequence data.
+/////////////////////////////////////////////////////////////////
+
+#ifndef __EVOLUTIONARYTREE_HPP__
+#define __EVOLUTIONARYTREE_HPP__
+
+#include <string>
+#include <list>
+#include <stdio.h>
+#include "SafeVector.h"
+#include "MultiSequence.h"
+#include "Sequence.h"
+#include "Util.hpp"
+
+using namespace std;
+
+
+/////////////////////////////////////////////////////////////////
+// TreeNode
+//
+// The fundamental unit for representing an alignment tree. The
+// guide tree is represented as a binary tree.
+/////////////////////////////////////////////////////////////////
+namespace MXSCARNA {
+class TreeNode {
+ int sequenceLabel; // sequence label
+ float sequenceIdentity; // sequence identity
+ TreeNode *left, *right, *parent; // pointers to left, right children
+ float leftLength, rightLength; // the length of left and right edge
+ /////////////////////////////////////////////////////////////////
+ // TreeNode::PrintNode()
+ //
+ // Internal routine used to print out the sequence comments
+ // associated with the evolutionary tree, using a hierarchical
+ // parenthesized format.
+ /////////////////////////////////////////////////////////////////
+
+ void PrintNode (ostream &outfile, const MultiSequence *sequences) const {
+
+ // if this is a leaf node, print out the associated sequence comment
+ if (sequenceLabel >= 0)
+ //outfile << sequences->GetSequence (sequenceLabel)->GetHeader();
+ outfile << sequences->GetSequence (sequenceLabel)->GetLabel();
+
+ // otherwise, it must have two children; print out their subtrees recursively
+ else {
+ assert (left);
+ assert (right);
+
+ outfile << "(";
+ left->PrintNode (outfile, sequences);
+ outfile << ",";
+ right->PrintNode (outfile, sequences);
+ outfile << ")";
+ }
+ }
+
+ public:
+
+ /////////////////////////////////////////////////////////////////
+ // TreeNode::TreeNode()
+ //
+ // Constructor for a tree node. Note that sequenceLabel = -1
+ // implies that the current node is not a leaf in the tree.
+ /////////////////////////////////////////////////////////////////
+
+ TreeNode (int sequenceLabel) : sequenceLabel (sequenceLabel),
+ left (NULL), right (NULL), parent (NULL) {
+ assert (sequenceLabel >= -1);
+ }
+
+ /////////////////////////////////////////////////////////////////
+ // TreeNode::~TreeNode()
+ //
+ // Destructor for a tree node. Recursively deletes all children.
+ /////////////////////////////////////////////////////////////////
+
+ ~TreeNode (){
+ if (left){ delete left; left = NULL; }
+ if (right){ delete right; right = NULL; }
+ parent = NULL;
+ }
+
+
+ // getters
+ int GetSequenceLabel () const { return sequenceLabel; }
+ TreeNode *GetLeftChild () const { return left; }
+ TreeNode *GetRightChild () const { return right; }
+ TreeNode *GetParent () const { return parent; }
+ float GetIdentity () const { return sequenceIdentity; }
+ float GetLeftLength () const { return leftLength; }
+ float GetRightLength () const { return rightLength; }
+ // setters
+ void SetSequenceLabel (int sequenceLabel){ this->sequenceLabel = sequenceLabel; assert (sequenceLabel >= -1); }
+ void SetLeftChild (TreeNode *left){ this->left = left; }
+ void SetRightChild (TreeNode *right){ this->right = right; }
+ void SetParent (TreeNode *parent){ this->parent = parent; }
+ void SetIdentity (float identity) { this->sequenceIdentity = identity; }
+ void SetLeftLength (float identity) { this->leftLength = identity; }
+ void SetRightLength (float identity) {this->rightLength = identity; }
+ /////////////////////////////////////////////////////////////////
+ // TreeNode::ComputeTree()
+ //
+ // Routine used to compute an evolutionary tree based on the
+ // given distance matrix. We assume the distance matrix has the
+ // form, distMatrix[i][j] = expected accuracy of aligning i with j.
+ /////////////////////////////////////////////////////////////////
+
+ static TreeNode *ComputeTree (const VVF &distMatrix, const VVF &identityMatrix){
+
+ int numSeqs = distMatrix.size(); // number of sequences in distance matrix
+ VVF distances (numSeqs, VF (numSeqs)); // a copy of the distance matrix
+ SafeVector<TreeNode *> nodes (numSeqs, NULL); // list of nodes for each sequence
+ SafeVector<int> valid (numSeqs, 1); // valid[i] tells whether or not the ith
+ // nodes in the distances and nodes array
+ // are valid
+ VVF identities (numSeqs, VF (numSeqs));
+ SafeVector<int> countCluster (numSeqs, 1);
+
+ // initialization: make a copy of the distance matrix
+ for (int i = 0; i < numSeqs; i++) {
+ for (int j = 0; j < numSeqs; j++) {
+ distances[i][j] = distMatrix[i][j];
+ identities[i][j] = identityMatrix[i][j];
+ }
+ }
+
+ // initialization: create all the leaf nodes
+ for (int i = 0; i < numSeqs; i++){
+ nodes[i] = new TreeNode (i);
+ assert (nodes[i]);
+ }
+
+ // repeat until only a single node left
+ for (int numNodesLeft = numSeqs; numNodesLeft > 1; numNodesLeft--){
+ float bestProb = -1;
+ pair<int,int> bestPair;
+
+ // find the closest pair
+ for (int i = 0; i < numSeqs; i++) if (valid[i]){
+ for (int j = i+1; j < numSeqs; j++) if (valid[j]){
+ if (distances[i][j] > bestProb){
+ bestProb = distances[i][j];
+ bestPair = make_pair(i, j);
+ }
+ }
+ }
+
+ // merge the closest pair
+ TreeNode *newParent = new TreeNode (-1);
+ newParent->SetLeftChild (nodes[bestPair.first]);
+ newParent->SetRightChild (nodes[bestPair.second]);
+ nodes[bestPair.first]->SetParent (newParent);
+ nodes[bestPair.second]->SetParent (newParent);
+ nodes[bestPair.first] = newParent;
+ nodes[bestPair.second] = NULL;
+ newParent->SetIdentity(identities[bestPair.first][bestPair.second]);
+
+
+ // now update the distance matrix
+ for (int i = 0; i < numSeqs; i++) if (valid[i]){
+ distances[bestPair.first][i] = distances[i][bestPair.first]
+ = (distances[i][bestPair.first]*countCluster[bestPair.first]
+ + distances[i][bestPair.second]*countCluster[bestPair.second])
+ / (countCluster[bestPair.first] + countCluster[bestPair.second]);
+// distances[bestPair.first][i] = distances[i][bestPair.first]
+// = (distances[i][bestPair.first] + distances[i][bestPair.second]) * bestProb / 2;
+ identities[bestPair.first][i] = identities[i][bestPair.first]
+ = (identities[i][bestPair.first]*countCluster[bestPair.first]
+ + identities[i][bestPair.second]*countCluster[bestPair.second])
+ / (countCluster[bestPair.first] + countCluster[bestPair.second]);
+ }
+
+ // finally, mark the second node entry as no longer valid
+ countCluster[bestPair.first] += countCluster[bestPair.second];
+ valid[bestPair.second] = 0;
+ }
+
+ assert (nodes[0]);
+ return nodes[0];
+ }
+
+ /////////////////////////////////////////////////////////////////
+ // TreeNode::Print()
+ //
+ // Print out the subtree associated with this node in a
+ // parenthesized representation.
+ /////////////////////////////////////////////////////////////////
+
+ void Print (ostream &outfile, const MultiSequence *sequences) const {
+// outfile << "Alignment tree: ";
+ PrintNode (outfile, sequences);
+ outfile << endl;
+ }
+};
+}
+#endif //__EVOLUTIONARYTREE_HPP__