Mac binaries
[jabaws.git] / website / archive / binaries / mac / src / clustalw / src / tree / Tree.cpp
diff --git a/website/archive/binaries/mac/src/clustalw/src/tree/Tree.cpp b/website/archive/binaries/mac/src/clustalw/src/tree/Tree.cpp
new file mode 100644 (file)
index 0000000..48ffa4b
--- /dev/null
@@ -0,0 +1,1270 @@
+/**
+ * Author: Mark Larkin
+ * 
+ * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson.  
+ */
+#ifdef HAVE_CONFIG_H
+    #include "config.h"
+#endif
+#include <sstream>
+#include "Tree.h"
+
+namespace clustalw
+{
+
+/**
+ * 
+ * @param firstSeq 
+ * @param lastSeq 
+ * @param sweight 
+ */
+void Tree::calcSeqWeights(int firstSeq, int lastSeq, vector<int>* sweight)
+{
+    if((int)sweight->size() < lastSeq - 1)
+    {
+        sweight->resize(lastSeq - 1);
+    }
+    
+    int i, _nSeqs;
+    int temp, sum;
+    int *weight;
+    /*
+     * If there are more than three sequences....
+     */
+    _nSeqs = lastSeq - firstSeq;
+    if ((_nSeqs >= 2) && (clustalw::userParameters->getDistanceTree() == true) && 
+        (clustalw::userParameters->getNoWeights() == false))
+    {
+        /*
+         * Calculate sequence weights based on Phylip tree.
+         */
+
+        weight = new int[lastSeq + 1];
+        
+        for (i = firstSeq; i < lastSeq; i++)
+        {
+            weight[i] = calcWeight(i);
+        }
+
+        /*
+         * Normalise the weights, such that the sum of the weights = INT_SCALE_FACTOR
+         */
+
+        sum = 0;
+        for (i = firstSeq; i < lastSeq; i++)
+        {
+            sum += weight[i];
+        }
+
+        if (sum == 0)
+        {
+            for (i = firstSeq; i < lastSeq; i++)
+            {
+                weight[i] = 1;
+            }
+            sum = i;
+        }
+
+        for (i = firstSeq; i < lastSeq; i++)
+        {
+            (*sweight)[i] = (weight[i] * INT_SCALE_FACTOR) / sum;
+            if ((*sweight)[i] < 1)
+            {
+                (*sweight)[i] = 1;
+            }
+        }
+        
+        delete []weight;
+        weight = NULL;
+    }
+
+    else
+    {
+        /*
+         * Otherwise, use identity weights.
+         */
+        temp = INT_SCALE_FACTOR / _nSeqs;
+        // AW 2009-05-28: goes wrong if we have more than
+        // INT_SCALE_FACTOR seqs. if so, set to 1, just as above
+        if (temp < 1)
+            temp = 1;
+
+        for (i = firstSeq; i < lastSeq; i++)
+        {
+            (*sweight)[i] = temp;
+        }
+    }
+
+}
+
+/**
+ * 
+ * @param alignPtr 
+ * @param treeFileName 
+ * @param firstSeq 
+ * @param lastSeq 
+ * @return 
+ */
+int Tree::readTree(clustalw::Alignment* alignPtr, const string& treeFileName, int firstSeq, int lastSeq)
+{
+
+    if(alignPtr == NULL || firstSeq < 0 || lastSeq < 1)
+    {
+        return 0;
+    }
+    char c;
+    string name1 = "", name2 = "";
+    int i, j, k;
+    bool found;
+
+    numSeq = 0;
+    nnodes = 0;
+    ntotal = 0;
+    rootedTree = true;
+
+    // Need to check what happens if I try to open a file that doesnt exist!
+    if(treeFileName.empty())
+    {
+        return 0; // Cannot open, empty string!
+    }
+    else
+    {
+        file.open(treeFileName.c_str(), ifstream::in);
+        if(!file.is_open())
+        {
+            clustalw::utilityObject->error("Cannot open output file [%s]\n", treeFileName.c_str());
+        }
+    }
+
+    skipSpace(&file);
+    charFromFile = file.get();
+    
+    if (charFromFile != '(')
+    {
+        clustalw::utilityObject->error("Wrong format in tree file %s\n", treeFileName.c_str());
+        return 0;
+    }
+    file.seekg(0, ios::beg); // Put get pointer back to the begining!
+
+    clustalw::userParameters->setDistanceTree(true);
+
+    
+    // Allocate memory for tree
+    
+    // Allocate memory.
+    nptr = new TreeNode*[3 * (lastSeq - firstSeq + 1)];
+    ptrs = new TreeNode*[3 * (lastSeq - firstSeq + 1)];
+    lptr = new TreeNode*[lastSeq - firstSeq + 1];
+    olptr = new TreeNode*[lastSeq + 1];
+    
+    seqTree = avail();
+    setInfo(seqTree, NULL, 0, string(""), 0.0);
+
+    createTree(seqTree, NULL, &file);
+    file.close();
+
+    if (numSeq != lastSeq - firstSeq)
+    {
+        stringstream ss;
+        ss << "tree not compatible with alignment\n(" << lastSeq - firstSeq 
+        << " sequences in alignment and "<< numSeq <<" in tree\n";
+        string errorMes;
+        ss >> errorMes;
+        clustalw::utilityObject->error(errorMes.c_str());
+        return 0;
+    }
+   
+    // If the tree is unrooted, reroot the tree - ie. minimise the difference
+    // between the mean root->leaf distances for the left and right branches of
+    // the tree.     
+
+    if (clustalw::userParameters->getDistanceTree() == false)
+    {
+        if (rootedTree == false)
+        {
+            clustalw::utilityObject->error("input tree is unrooted and has no distances.\nCannot align sequences");
+            return 0;
+        }
+    }
+
+    if (rootedTree == false)
+    {
+        root = reRoot(seqTree, lastSeq - firstSeq + 1);
+    }
+    else
+    {
+        root = seqTree;
+    }
+   
+    // calculate the 'order' of each node.
+    int nameLength;
+    string nameSeq;
+    orderNodes();
+
+    if (numSeq >= 2)
+    {
+        // If there are more than three sequences....
+        // assign the sequence nodes (in the same order as in the alignment file)
+        for (i = firstSeq; i < lastSeq; i++)
+        {
+            nameLength = alignPtr->getName(i + 1).length();
+            nameSeq = alignPtr->getName(i + 1);
+            name1 = "";
+            name2 = "";
+            if (nameLength > clustalw::MAXNAMES)
+            {
+                stringstream ss;
+                ss << "name " << nameSeq << " is too long for PHYLIP tree format (max " 
+                   << clustalw::MAXNAMES << " chars)";
+                string msg; 
+                ss >> msg; 
+                clustalw::utilityObject->warning(msg.c_str());// Mark change 17-5-07
+            }
+
+            for (k = 0; k < nameLength && k < clustalw::MAXNAMES; k++)
+            {
+                c = nameSeq[k];
+                if ((c > 0x40) && (c < 0x5b)) 
+                {
+                    c = c | 0x20;
+                }
+                if (c == ' ')
+                {
+                    c = '_';
+                }
+                name2 += c;
+            }
+            found = false;
+
+            for (j = 0; j < numSeq; j++)
+            {
+                name1 = "";
+                for (k = 0; k < (int)lptr[j]->name.length() && k < clustalw::MAXNAMES; k++)
+                {
+                    c = lptr[j]->name[k];
+                    if ((c > 0x40) && (c < 0x5b))
+                    {
+                        c = c | 0x20;
+                    }
+
+                    name1 += c;
+                }
+
+                if (name1.compare(name2) == 0)
+                {
+                    olptr[i] = lptr[j];
+                    found = true;
+                }
+            }
+
+            if (found == false)
+            {
+                utilityObject->error("tree not compatible with alignment:\n %s not found\n", name2.c_str());
+                return 0;
+            }
+        }
+
+    }
+    return (1);
+}
+
+/**
+ * 
+ * @param firstSeq 
+ * @param lastSeq 
+ * @return 
+ */
+auto_ptr<AlignmentSteps> Tree::createSets(int firstSeq, int lastSeq)
+{
+    auto_ptr<AlignmentSteps> progAlignSteps;
+    progAlignSteps.reset(new AlignmentSteps);
+    
+    int i, j, _nSeqs;
+
+    numSets = 0;
+    _nSeqs = lastSeq - firstSeq;
+    if (_nSeqs >= 2)
+    {
+        // If there are more than three sequences....
+        groups = new int[_nSeqs + 1];
+        groupSeqs(root, groups, _nSeqs, progAlignSteps.get());
+
+        delete []groups;
+        groups  = NULL;
+
+    }
+
+    else
+    {
+        groups = new int[_nSeqs + 1];
+        for (i = 0; i < _nSeqs - 1; i++)
+        {
+            for (j = 0; j < _nSeqs; j++)
+                if (j <= i)
+                {
+                    groups[j] = 1;
+                }
+                else if (j == i + 1)
+                {
+                    groups[j] = 2;
+                }
+                else
+                {
+                    groups[j] = 0;
+                }
+
+            progAlignSteps->saveSet(_nSeqs, groups);
+        }
+        delete []groups;
+        groups  = NULL;
+    }    
+    
+    return progAlignSteps;
+}
+
+/**
+ * calcSimilarities changes the distMat.
+ * @param alignPtr 
+ * @param distMat 
+ * @return 
+ */
+int Tree::calcSimilarities(clustalw::Alignment* alignPtr, clustalw::DistMatrix* distMat)
+{
+    int depth = 0, i, j, k, n;
+    bool found;
+    int nerrs, seq1[MAXERRS], seq2[MAXERRS];
+    TreeNode *p,  **pathToRoot;
+    float dist;
+    float *distToNode, badDist[MAXERRS];
+    double **dmat;
+    ostringstream err1;
+    char reply;
+    int nSeqs = alignPtr->getNumSeqs();
+    
+    pathToRoot = new TreeNode*[nSeqs];
+    distToNode = new float[nSeqs];
+    dmat = new double*[nSeqs];
+
+    for (i = 0; i < nSeqs; i++)
+    {
+        dmat[i] = new double[nSeqs];
+    }
+
+    if (nSeqs >= 2)
+    {
+        /*
+         * for each leaf, determine all nodes between the leaf and the root;
+         */
+        for (i = 0; i < nSeqs; i++)
+        {
+            depth = 0;dist = 0.0;
+            p = olptr[i];
+            while (p != NULL)
+            {
+                pathToRoot[depth] = p;
+                dist += p->dist;
+                distToNode[depth] = dist;
+                p = p->parent;
+                depth++;
+            }
+
+            /*
+             * for each pair....
+             */
+            for (j = 0; j < i; j++)
+            {
+                p = olptr[j];
+                dist = 0.0;
+                /*
+                 * find the common ancestor.
+                 */
+                found = false;
+                n = 0;
+                while ((found == false) && (p->parent != NULL))
+                {
+                    for (k = 0; k < depth; k++)
+                    if (p->parent == pathToRoot[k])
+                    {
+                        found = true;
+                        n = k;
+                    }
+
+                    dist += p->dist;
+                    p = p->parent;
+                }
+
+                dmat[i][j] = dist + distToNode[n - 1];
+            }
+        }
+
+        nerrs = 0;
+        for (i = 0; i < nSeqs; i++)
+        {
+            dmat[i][i] = 0.0;
+            for (j = 0; j < i; j++)
+            {
+                if (dmat[i][j] < 0.01)
+                {
+                    dmat[i][j] = 0.01;
+                }
+                if (dmat[i][j] > 1.0)
+                {
+                    if (dmat[i][j] > 1.1 && nerrs < MAXERRS)
+                    {
+                        seq1[nerrs] = i;
+                        seq2[nerrs] = j;
+                        badDist[nerrs] = dmat[i][j];
+                        nerrs++;
+                    }
+                    dmat[i][j] = 1.0;
+                }
+            }
+        }
+        if (nerrs > 0)
+        {
+            string errMess = "The following sequences are too divergent to be aligned:\n";
+            
+            for (i = 0; i < nerrs && i < 5; i++)
+            {
+                err1 << "           " << alignPtr->getName(seq1[i] + 1) << " and " 
+                     << alignPtr->getName(seq2[i] + 1) << " (distance " 
+                     << setprecision(3) << badDist[i] << ")\n"; 
+            }
+            errMess += err1.str();
+            errMess += "(All distances should be between 0.0 and 1.0)\n";
+            errMess += "This may not be fatal but you have been warned!\n";
+            errMess += "SUGGESTION: Remove one or more problem sequences and try again";
+            
+            if (clustalw::userParameters->getInteractive())
+            {
+                reply = clustalw::utilityObject->promptForYesNo(errMess.c_str(), "Continue ");
+            }
+            else
+            {
+                reply = 'y';
+            }
+            if ((reply != 'y') && (reply != 'Y'))
+            {
+                return 0;
+            }
+        }
+    }
+    else
+    {
+        for (i = 0; i < nSeqs; i++)
+        {
+            for (j = 0; j < i; j++)
+            {
+                dmat[i][j] = (*distMat)(i + 1, j + 1);
+            }
+        }
+    }
+    delete []pathToRoot;
+    delete []distToNode;
+    
+    double value;
+    for (i = 0; i < nSeqs; i++)
+    {
+        distMat->SetAt(i + 1, i + 1, 0.0);
+        for (j = 0; j < i; j++)
+        {
+            value = 100.0 - (dmat[i][j]) * 100.0;
+            distMat->SetAt(i + 1, j + 1, value);
+            distMat->SetAt(j + 1, i + 1, value);
+        }
+    }
+
+    for (i = 0; i < nSeqs; i++)
+    {
+        delete [] dmat[i];
+    }
+
+    delete []dmat;
+
+    return 1;
+    
+}
+
+/** *************************************************************************
+ * Private functions!!!!!!!!!!!!!!!                                         *
+ ****************************************************************************/
+/**
+ * 
+ * @param ptree 
+ * @param parent 
+ * @param file 
+ */
+void Tree::createTree(clustalw::TreeNode* ptree, clustalw::TreeNode* parent, ifstream* file)
+{
+    TreeNode* p;
+
+    int i, type;
+    float dist;
+    string name;
+
+    
+    // is this a node or a leaf ?
+    skipSpace(file);
+    charFromFile = file->get();
+    if (charFromFile == '(')
+    {
+        // this must be a node....
+        type = NODE;
+        name = "";
+        ptrs[ntotal] = nptr[nnodes] = ptree;
+        nnodes++;
+        ntotal++;
+
+        createNode(ptree, parent);
+
+        p = ptree->left;
+        createTree(p, ptree, file);
+
+        if (charFromFile == ',')
+        {
+            p = ptree->right;
+            createTree(p, ptree, file);
+            if (charFromFile == ',')
+            {
+                ptree = insertNode(ptree);
+                ptrs[ntotal] = nptr[nnodes] = ptree;
+                nnodes++;
+                ntotal++;
+                p = ptree->right;
+                createTree(p, ptree, file);
+                rootedTree = false;
+            }
+        }
+
+        skipSpace(file);
+        charFromFile = file->get();
+    }
+    
+    // ...otherwise, this is a leaf
+    
+    else
+    {
+        type = LEAF;
+        ptrs[ntotal++] = lptr[numSeq++] = ptree;        
+        // get the sequence name
+        name = "";
+        name += charFromFile;
+        charFromFile = file->get();
+        
+        i = 1;
+        while ((charFromFile != ':') && (charFromFile != ',') && (charFromFile != ')'))
+        {
+            if (i < MAXNAMES)
+            {
+                name += charFromFile;
+                i++;
+            }
+            charFromFile = file->get();
+        }
+
+        if (charFromFile != ':')
+        {
+            clustalw::userParameters->setDistanceTree(false);
+            dist = 0.0;
+        }
+    }
+    
+    // get the distance information
+    
+    dist = 0.0;
+    if (charFromFile == ':')
+    {
+        skipSpace(file);
+        (*file) >> dist;
+        skipSpace(file);
+        charFromFile = file->get();
+    }
+    setInfo(ptree, parent, type, name, dist);
+}
+
+/**
+ * 
+ * @param pptr 
+ * @param parent 
+ */
+void Tree::createNode(TreeNode* pptr, TreeNode* parent)
+{
+    TreeNode* t;
+    pptr->parent = parent;
+    t = avail();
+    pptr->left = t;
+    t = avail();
+    pptr->right = t;
+}
+
+/**
+ * 
+ * @param pptr 
+ * @return 
+ */
+TreeNode* Tree::insertNode(TreeNode* pptr)
+{
+
+    TreeNode* newnode;
+
+    newnode = avail();
+    createNode(newnode, pptr->parent);
+
+    newnode->left = pptr;
+    pptr->parent = newnode;
+
+    setInfo(newnode, pptr->parent, NODE, "", 0.0);
+
+    return newnode;
+}
+
+/**
+ * 
+ * @param p 
+ */
+void Tree::clearTree(clustalw::TreeNode* p)
+{
+    clearTreeNodes(p);
+    delete [] nptr;
+    nptr  = NULL;
+    delete [] ptrs;
+    ptrs  = NULL;
+    delete [] lptr;
+    lptr  = NULL;
+    delete [] olptr;
+    olptr  = NULL;
+}
+
+/**
+ * 
+ * @param p 
+ */
+void Tree::clearTreeNodes(clustalw::TreeNode* p)
+{
+    if (p == NULL)
+    {
+        p = root;
+    }
+    if (p->left != NULL)
+    {
+        clearTreeNodes(p->left);
+    }
+    if (p->right != NULL)
+    {
+        clearTreeNodes(p->right);
+    }
+    p->left = NULL;
+    p->right = NULL;
+    
+    delete p;
+    p  = NULL;
+}
+
+
+
+
+/**
+ * 
+ * @param 
+ * @param 
+ * @return 
+ */
+void Tree::debugPrintAllNodes(int nseqs)
+{
+  clustalw::TreeNode *p;
+  int i;
+   float diff, maxDist;
+    cerr << "\nDEBUG: reportAllNodes\n";
+    for (i = 0; i < ntotal; i++) {
+        p = ptrs[i];
+        //        ios::sync_with_stdio();
+
+        // same design as TreeNode
+        if (p->parent == NULL)
+            diff = calcRootMean(p, &maxDist);
+        else
+            diff = calcMean(p, &maxDist, nseqs);
+        fprintf(stdout,
+                "i=%d p=%p: parent=%p left=%p right=%p dist=%f diff=%f\n",
+                i, (void*)p, (void*)p->parent, (void*)p->left, (void*)p->right,
+               p->dist, diff);
+    }
+}
+
+
+
+
+
+/**
+ * 
+ * @param ptree 
+ * @param nseqs 
+ * @return 
+ */
+clustalw::TreeNode* Tree::reRoot(clustalw::TreeNode* ptree, int nseqs)
+{
+    clustalw::TreeNode *p, *rootNode, *rootPtr;
+    float diff, minDiff = 0.0, minDepth = 1.0, maxDist;
+    int i;
+    bool first = true;
+
+    // find the difference between the means of leaf->node
+    // distances on the left and on the right of each node
+    rootPtr = ptree;
+    for (i = 0; i < ntotal; i++)
+    {
+        p = ptrs[i];
+        if (p->parent == NULL)
+        {
+            /* AW Bug 94: p->parent must be chosen as rootNode
+               (non-optimized executables (-O0) never do), otherwise
+               insertRoot fails.
+               Is p->parent == NULL valid at all?
+               Simplest thing for now is to continue here. Tree code
+               needs serious dismantling anyway. See debugPrintAllNodes
+            */
+
+            continue;
+            //diff = calcRootMean(p, &maxDist);
+        }
+        else
+        {
+            diff = calcMean(p, &maxDist, nseqs);
+        }
+
+        if ((diff == 0) || ((diff > 0) && (diff < 2 *p->dist)))
+        {
+            if ((maxDist < minDepth) || (first == true))
+            {
+                first = false;
+                rootPtr = p;
+                minDepth = maxDist;
+                minDiff = diff;
+            }
+        }
+
+    }
+
+    
+    // insert a new node as the ancestor of the node which produces the shallowest
+    // tree.
+    /* AW Bug 94: could also be prevented here */
+    if (rootPtr == ptree)
+    {
+        minDiff = rootPtr->left->dist + rootPtr->right->dist;
+        rootPtr = rootPtr->right;
+    }
+    rootNode = insertRoot(rootPtr, minDiff);
+
+    diff = calcRootMean(rootNode, &maxDist);
+
+    return rootNode;
+}
+
+/**
+ * 
+ * @param p 
+ * @param diff 
+ * @return 
+ */
+clustalw::TreeNode* Tree::insertRoot(clustalw::TreeNode* p, float diff)
+{
+    clustalw::TreeNode *newp, *prev, *q, *t;
+    float dist, prevDist, td;
+
+    newp = avail();
+
+    if (p->parent==NULL) {
+        // AW bug 94: question remains if access here should be handled differently
+        cerr << "\n\n*** INTERNAL ERROR: Tree::insertRoot: TreeNode p->parent is NULL\n";
+        cerr << "To help us fix this bug, please send sequence file and used options to clustalw@ucd.ie\n";
+        exit(1);
+    }
+
+    t = p->parent;
+    prevDist = t->dist;
+
+    p->parent = newp;
+
+    dist = p->dist;
+
+    p->dist = diff / 2;
+
+    if (p->dist < 0.0)
+    {
+        p->dist = 0.0;
+    }
+
+    if (p->dist > dist)
+    {
+        p->dist = dist;
+    }
+
+    t->dist = dist - p->dist;
+
+    newp->left = t;
+    newp->right = p;
+    newp->parent = NULL;
+    newp->dist = 0.0;
+    newp->leaf = NODE;
+
+    if (t->left == p)
+    {
+        t->left = t->parent;
+    }
+    else
+    {
+        t->right = t->parent;
+    }
+
+    prev = t;
+    q = t->parent;
+
+    t->parent = newp;
+
+    while (q != NULL)
+    {
+        if (q->left == prev)
+        {
+            q->left = q->parent;
+            q->parent = prev;
+            td = q->dist;
+            q->dist = prevDist;
+            prevDist = td;
+            prev = q;
+            q = q->left;
+        }
+        else
+        {
+            q->right = q->parent;
+            q->parent = prev;
+            td = q->dist;
+            q->dist = prevDist;
+            prevDist = td;
+            prev = q;
+            q = q->right;
+        }
+    }
+
+    /*
+     * remove the old root node
+     */
+    q = prev;
+    if (q->left == NULL)
+    {
+        dist = q->dist;
+        q = q->right;
+        q->dist += dist;
+        q->parent = prev->parent;
+
+        if (prev->parent->left == prev)
+        {
+            prev->parent->left = q;
+        }
+        else
+        {
+            prev->parent->right = q;
+        }
+
+        prev->right = NULL;
+    }
+    else
+    {
+        dist = q->dist;
+        q = q->left;
+        q->dist += dist;
+        q->parent = prev->parent;
+
+        if (prev->parent->left == prev)
+        {
+            prev->parent->left = q;
+        }
+        else
+        {
+            prev->parent->right = q;
+        }
+
+        prev->left = NULL;
+    }
+
+    return (newp);
+}
+
+/**
+ * 
+ * @param root 
+ * @param maxDist 
+ * @return 
+ */
+float Tree::calcRootMean(clustalw::TreeNode* root, float *maxDist)
+{
+    float dist, leftSum = 0.0, rightSum = 0.0, leftMean, rightMean, diff;
+    clustalw::TreeNode* p;
+    int i;
+    int numLeft, numRight;
+    int direction;
+    
+    // for each leaf, determine whether the leaf is left or right of the root.
+
+    dist = (*maxDist) = 0;
+    numLeft = numRight = 0;
+    for (i = 0; i < numSeq; i++)
+    {
+        p = lptr[i];
+        dist = 0.0;
+        while (p->parent != root)
+        {
+            dist += p->dist;
+            p = p->parent;
+        }
+
+        if (p == root->left)
+        {
+            direction = LEFT;
+        }
+        else
+        {
+            direction = RIGHT;
+        }
+
+        dist += p->dist;
+
+        if (direction == LEFT)
+        {
+            leftSum += dist;
+            numLeft++;
+        }
+        else
+        {
+            rightSum += dist;
+            numRight++;
+        }
+        if (dist > (*maxDist))
+        {
+            *maxDist = dist;
+        }
+    }
+
+    leftMean = leftSum / numLeft;
+    rightMean = rightSum / numRight;
+
+    diff = leftMean - rightMean;
+    return diff;
+}
+
+/**
+ * 
+ * @param nptr 
+ * @param maxDist 
+ * @param nSeqs 
+ * @return 
+ */
+float Tree::calcMean(clustalw::TreeNode* nptr, float *maxDist, int nSeqs)
+{
+    float dist, leftSum = 0.0, rightSum = 0.0, leftMean, rightMean, diff;
+    clustalw::TreeNode* p;  
+    clustalw::TreeNode** pathToRoot;
+    float *distToNode;
+    int depth = 0, i, j, n = 0;
+    int numLeft, numRight;
+    int direction;
+    bool found;
+
+    pathToRoot = new clustalw::TreeNode*[nSeqs];
+    distToNode = new float[nSeqs];
+    // determine all nodes between the selected node and the root;
+  
+    depth = 0;
+    (*maxDist) = dist = 0.0;
+    numLeft = numRight = 0;
+    p = nptr;
+    while (p != NULL)
+    {
+        pathToRoot[depth] = p;
+        dist += p->dist;
+        distToNode[depth] = dist;
+        p = p->parent;
+        depth++;
+    }
+
+    // for each leaf, determine whether the leaf is left or right of the node.
+    // (RIGHT = descendant, LEFT = not descendant)
+    for (i = 0; i < numSeq; i++)
+    {
+        p = lptr[i];
+        if (p == nptr)
+        {
+            direction = RIGHT;
+            dist = 0.0;
+        }
+        else
+        {
+            direction = LEFT;
+            dist = 0.0;
+            
+            // find the common ancestor. 
+            
+            found = false;
+            n = 0;
+            while ((found == false) && (p->parent != NULL))
+            {
+                for (j = 0; j < depth; j++)
+                if (p->parent == pathToRoot[j])
+                {
+                    found = true;
+                    n = j;
+                }
+
+                dist += p->dist;
+                p = p->parent;
+            }
+            if (p == nptr)
+            {
+                direction = RIGHT;
+            }
+        }
+
+        if (direction == LEFT)
+        {
+            leftSum += dist;
+            leftSum += distToNode[n - 1];
+            numLeft++;
+        }
+        else
+        {
+            rightSum += dist;
+            numRight++;
+        }
+
+        if (dist > (*maxDist))
+        {
+            *maxDist = dist;
+        }
+    }
+
+    delete [] distToNode;
+    distToNode = NULL;
+    delete [] pathToRoot;
+    pathToRoot = NULL;
+    
+    leftMean = leftSum / numLeft;
+    rightMean = rightSum / numRight;
+
+    diff = leftMean - rightMean;
+    return (diff);
+}
+
+/**
+ * 
+ */
+void Tree::orderNodes()
+{
+    int i;
+    clustalw::TreeNode* p;
+
+    for (i = 0; i < numSeq; i++)
+    {
+        p = lptr[i];
+        while (p != NULL)
+        {
+            p->order++;
+            p = p->parent;
+        }
+    }
+}
+
+/**
+ * 
+ * @param leaf 
+ * @return 
+ */
+int Tree::calcWeight(int leaf)
+{
+    clustalw::TreeNode* p;
+    float weight = 0.0;
+
+    p = olptr[leaf];
+    while (p->parent != NULL)
+    {
+        weight += p->dist / p->order;
+        p = p->parent;
+    }
+
+    weight *= 100.0;
+
+    return ((int)weight);
+}
+
+/**
+ * skipSpace is used to skip all the spaces at the begining of a file. The next read
+ * will give a character other than a space.
+ * @param file 
+ */
+void Tree::skipSpace(ifstream* file)
+{
+    char c;
+
+    do
+    {
+        c = file->get();
+    }
+    while (isspace(c));
+
+    file->putback(c);
+}
+
+/**
+ * 
+ * @param p 
+ * @param nextGroups 
+ * @param nSeqs 
+ * @param stepsPtr 
+ */
+void Tree::groupSeqs(clustalw::TreeNode* p, int *nextGroups, int nSeqs, AlignmentSteps* stepsPtr)
+{
+    int i;
+    int *tmpGroups;
+
+    tmpGroups = new int[nSeqs + 1];
+    for (i = 0; i < nSeqs; i++)
+    {
+        tmpGroups[i] = 0;
+    }
+
+    if (p->left != NULL)
+    {
+        if (p->left->leaf == NODE)
+        {
+            groupSeqs(p->left, nextGroups, nSeqs, stepsPtr);
+
+            for (i = 0; i < nSeqs; i++)
+                if (nextGroups[i] != 0)
+                {
+                    tmpGroups[i] = 1;
+                }
+        }
+        else
+        {
+            markGroup1(p->left, tmpGroups, nSeqs);
+        }
+
+    }
+
+    if (p->right != NULL)
+    {
+        if (p->right->leaf == NODE)
+        {
+            groupSeqs(p->right, nextGroups, nSeqs, stepsPtr);
+            for (i = 0; i < nSeqs; i++)
+                if (nextGroups[i] != 0)
+                {
+                    tmpGroups[i] = 2;
+                }
+        }
+        else
+        {
+            markGroup2(p->right, tmpGroups, nSeqs);
+        }
+        stepsPtr->saveSet(nSeqs, tmpGroups);
+    }
+
+    for (i = 0; i < nSeqs; i++)
+    {
+        nextGroups[i] = tmpGroups[i];
+    }
+
+    delete [] tmpGroups;
+    tmpGroups  = NULL;
+}
+
+/**
+ * 
+ * @param p 
+ * @param groups 
+ * @param n 
+ */
+void Tree::markGroup1(clustalw::TreeNode* p, int *groups, int n)
+{
+    int i;
+
+    for (i = 0; i < n; i++)
+    {
+        if (olptr[i] == p)
+        {
+            groups[i] = 1;
+        }
+        else
+        {
+            groups[i] = 0;
+        }
+    }
+}
+
+/**
+ * 
+ * @param p 
+ * @param groups 
+ * @param n 
+ */
+void Tree::markGroup2(clustalw::TreeNode* p, int *groups, int n)
+{
+    int i;
+
+    for (i = 0; i < n; i++)
+    {
+        if (olptr[i] == p)
+        {
+            groups[i] = 2;
+        }
+        else if (groups[i] != 0)
+        {
+            groups[i] = 1;
+        }
+    }
+}
+
+/**
+ * 
+ * @return 
+ */
+clustalw::TreeNode* Tree::avail()
+{
+    clustalw::TreeNode* p;
+    p = new TreeNode; 
+    p->left = NULL;
+    p->right = NULL;
+    p->parent = NULL;
+    p->dist = 0.0;
+    p->leaf = 0;
+    p->order = 0;
+    return (p);
+}
+
+/**
+ * 
+ * @param p 
+ * @param parent 
+ * @param pleaf 
+ * @param pname 
+ * @param pdist 
+ */
+void Tree::setInfo(TreeNode* p, TreeNode* parent, int pleaf, string pname, float
+                     pdist)
+{
+    p->parent = parent;
+    p->leaf = pleaf;
+    p->dist = pdist;
+    p->order = 0;
+    p->name = pname;
+    if (p->leaf == true)
+    {
+        p->left = NULL;
+        p->right = NULL;
+    }
+}
+          
+}