Mac binaries
[jabaws.git] / website / archive / binaries / mac / src / clustalw / src / tree / TreeInterface.cpp
diff --git a/website/archive/binaries/mac/src/clustalw/src/tree/TreeInterface.cpp b/website/archive/binaries/mac/src/clustalw/src/tree/TreeInterface.cpp
new file mode 100644 (file)
index 0000000..d883a95
--- /dev/null
@@ -0,0 +1,671 @@
+/**
+ * Author: Mark Larkin
+ * 
+ * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson.  
+ */
+/**
+ * Changes:
+ * Mark 10-5-2007: Bug fix # 42. Added getWeightsForQtLowScore function.
+ * Mark 22-5-2007: Made a change to getWeightsForQtLowScore
+ * Mark 23-5-2007: Made a change to getWeightsForQtLowScore
+ */
+#ifdef HAVE_CONFIG_H
+    #include "config.h"
+#endif
+#include "TreeInterface.h"
+#include <sstream>
+#include "UnRootedClusterTree.h"
+#include "Tree.h"
+#include "../multipleAlign/MSA.h"
+#include "../general/userparams.h"
+#include "../general/debuglogObject.h"
+#include "UPGMA/RootedGuideTree.h"
+#include "UPGMA/RootedClusterTree.h"
+namespace clustalw
+{
+
+/**
+ * This will be called by align!
+ *
+ */
+auto_ptr<AlignmentSteps>
+TreeInterface::getWeightsAndStepsFromDistMat(vector<int>* seqWeights, DistMatrix* distMat, 
+                                             Alignment *alignPtr, int seq1, int nSeqs, 
+                                             string* phylipName, bool* success)
+{
+    #if DEBUGFULL 
+        if(logObject && DEBUGLOG)
+        {
+            logObject->logMsg("In getWeightsAndStepsFromDistMat\n");
+        }
+    #endif    
+    if(userParameters->getClusterAlgorithm() == UPGMA)
+    {
+        return getWeightsAndStepsFromDistMatUPGMA(seqWeights, distMat, alignPtr, 
+                                                seq1, nSeqs, phylipName, success);
+    }
+    else
+    {
+        return getWeightsAndStepsFromDistMatNJ(seqWeights, distMat, alignPtr, 
+                                                seq1, nSeqs, phylipName, success);
+    }
+}
+
+                                   
+/**
+ * This will be called by sequencesAlignToProfile. This function will put the distMat into
+ * a similarity matrix.
+ */
+void
+TreeInterface::getWeightsFromDistMat(vector<int>* seqWeights, DistMatrix* distMat, 
+                                     Alignment *alignPtr, int seq1, int nSeqs, 
+                                     string* phylipName, bool* success)
+{
+    if(userParameters->getClusterAlgorithm() == UPGMA)
+    {    
+        getWeightsFromDistMatUPGMA(seqWeights, distMat, alignPtr, seq1, nSeqs, phylipName,
+                                   success);
+    }
+    else
+    {
+        getWeightsFromDistMatNJ(seqWeights, distMat, alignPtr, seq1, nSeqs, phylipName,
+                                success);    
+    }
+}
+
+/**
+ * This function will be called by profileAlign
+ */
+void
+TreeInterface::getWeightsForProfileAlign(Alignment* alignPtr, DistMatrix* distMat, 
+                                         string* p1TreeName, vector<int>* p1Weights, string* p2TreeName, 
+                                         vector<int>* p2Weights, int numSeqs, int profile1NumSeqs, bool useTree1, 
+                                         bool useTree2, bool* success)
+{    
+    if(userParameters->getClusterAlgorithm() == UPGMA)
+    {    
+        getWeightsForProfileAlignUPGMA(alignPtr, distMat, p1TreeName, p1Weights, p2TreeName,
+                                p2Weights, numSeqs, profile1NumSeqs, useTree1, useTree2,
+                                success);
+    }
+    else
+    {
+        getWeightsForProfileAlignNJ(alignPtr, distMat, p1TreeName, p1Weights, p2TreeName,
+                                p2Weights, numSeqs, profile1NumSeqs, useTree1, useTree2,
+                                success);    
+    }                               
+}                    
+
+/**
+ * This function will be called by doAlignUseOldGuideTree
+ *
+ */                                   
+auto_ptr<AlignmentSteps>
+TreeInterface::getWeightsAndStepsFromTree(Alignment* alignPtr, 
+                                          DistMatrix* distMat, string* treeName,
+                                          vector<int>* seqWeights, int fSeq, 
+                                          int numSeqs, bool* success)         
+{
+    /**
+     * This will only use the NJ. It will not use UPGMA
+     */
+    return getWeightsAndStepsFromTreeNJ(alignPtr, distMat, treeName, seqWeights, 
+                                        fSeq, numSeqs, success);
+}
+
+/**
+ * Called by sequencesAlignToProfile
+ */
+int
+TreeInterface::getWeightsFromGuideTree(Alignment* alignPtr, DistMatrix* distMat,
+                                       string* treeName, vector<int>* seqWeights, int fSeq,
+                                       int nSeqs, bool* success)
+{
+    /**
+     * This will only use the NJ. It will not use UPGMA
+     */    
+    return getWeightsFromGuideTreeNJ(alignPtr, distMat, treeName, seqWeights, fSeq, nSeqs, 
+                                      success);
+}
+/**
+ * This will be called by doGuideTreeOnly. This does not put the distMat into similarity.
+ *
+ */                                     
+void
+TreeInterface::generateTreeFromDistMat(DistMatrix* distMat, Alignment *alignPtr, 
+                                       int seq1, int nSeqs, 
+                                       string* phylipName, bool* success)
+{
+    /**
+     * This function does not put distMat into similarities
+     */
+    if(userParameters->getClusterAlgorithm() == UPGMA)
+    { 
+        RootedGuideTree guideTree;
+        generateTreeFromDistMatUPGMA(&guideTree, distMat, alignPtr, seq1, nSeqs, 
+                                     phylipName, success);
+    }
+    else
+    {                                            
+        generateTreeFromDistMatNJ(distMat, alignPtr, seq1, nSeqs, phylipName, success);
+    }
+}
+
+void
+TreeInterface::treeFromAlignment(TreeNames* treeNames, Alignment *alignPtr)
+{
+    if(userParameters->getClusterAlgorithm() == UPGMA)
+    {
+        RootedClusterTree clusterTree;
+        clusterTree.treeFromAlignment(treeNames, alignPtr);
+    }
+    else
+    {
+        UnRootedClusterTree clusterTree;
+        clusterTree.treeFromAlignment(treeNames, alignPtr);
+    }
+}                   
+void
+TreeInterface::bootstrapTree(TreeNames* treeNames, Alignment *alignPtr)
+{
+    /**
+     * NOTE We only do bootstrapping on the NJ tree.
+     */
+    UnRootedClusterTree clusterTree;
+    clusterTree.bootstrapTree(treeNames, alignPtr);
+}
+
+
+/**
+ * Private functions!!!!
+ *
+ */
+
+/** *******************
+ *      
+ *      Neighbour joining functions
+ */ 
+/**
+ * Note: After this function has been called the distance matrix will all be in similarities.
+ *
+ */   
+auto_ptr<AlignmentSteps>
+TreeInterface::getWeightsAndStepsFromDistMatNJ(vector<int>* seqWeights, DistMatrix* distMat, 
+                                               Alignment *alignPtr, int seq1, int nSeqs, 
+                                               string* phylipName, bool* success)
+{   
+    auto_ptr<AlignmentSteps> progSteps;   
+    generateTreeFromDistMatNJ(distMat, alignPtr, seq1, nSeqs, phylipName, success);
+        
+    progSteps = getWeightsAndStepsUseOldGuideTreeNJ(distMat, alignPtr, phylipName,
+                                                    seqWeights, seq1, nSeqs, success);
+    return progSteps;
+}
+
+auto_ptr<AlignmentSteps>
+TreeInterface::getWeightsAndStepsUseOldGuideTreeNJ(DistMatrix* distMat, Alignment *alignPtr,
+                                                   string* treeName, vector<int>* seqWeights, 
+                                                   int fSeq, int nSeqs, bool* success)
+{   
+    Tree groupTree; 
+    auto_ptr<AlignmentSteps> progSteps;
+    
+    if(nSeqs == 1)
+    {
+        utilityObject->info("Only 1 sequence, cannot do multiple alignment\n");
+        *success = false;
+        return progSteps;
+    }
+    
+    int status = 0;
+    
+    status = readTreeAndCalcWeightsNJ(&groupTree, alignPtr, distMat, treeName,
+                                          seqWeights, fSeq, nSeqs);
+                                     
+    if(status == 0)
+    {
+        *success = false;
+        return progSteps;
+    }
+    
+    progSteps = groupTree.createSets(0, nSeqs);
+    int _numSteps = progSteps->getNumSteps();
+    
+    utilityObject->info("There are %d groups", _numSteps);        
+    // clear the memory used for the phylogenetic tree
+
+    if (nSeqs >= 2)
+    {
+        groupTree.clearTree(NULL);
+    }
+                                          
+    *success = true;
+    return progSteps;   
+}
+                                                                                              
+
+                                                                                
+/**
+ * The function readTreeAndCalcWeightsNJ is used to read in the tree given the treeName.
+ * It then calls the appropriate functions to calc the seqWeights and make sure the matrix 
+ * is in similarity mode. 
+ */                                                                                
+int
+TreeInterface::readTreeAndCalcWeightsNJ(Tree* groupTree, Alignment* alignPtr, 
+                                        DistMatrix* distMat, string* treeName,
+                                        vector<int>* seqWeights, int fSeq, int nSeqs)
+{
+    int status = 0;
+    if (nSeqs >= 2)
+    {
+        status = groupTree->readTree(alignPtr, treeName->c_str(), fSeq - 1, nSeqs);
+        if (status == 0)
+        {
+            return status;
+        }
+    }
+
+    groupTree->calcSeqWeights(fSeq - 1, nSeqs, seqWeights);
+    
+    status = groupTree->calcSimilarities(alignPtr, distMat);
+    
+    return status;                                                            
+}
+
+int
+TreeInterface::getWeightsFromGuideTreeNJ(Alignment* alignPtr, DistMatrix* distMat,
+                                         string* treeName, vector<int>* seqWeights, int fSeq, int nSeqs, 
+                                         bool* success)
+{
+    Tree groupTree;
+    int status = readTreeAndCalcWeightsNJ(&groupTree, alignPtr, distMat, treeName,
+                                          seqWeights, fSeq, nSeqs);
+    if(status == 0)
+    {
+        *success = false;
+    }
+    else
+    {
+        *success = true;
+    }
+    return status;
+}                                                                                             
+
+void
+TreeInterface::getWeightsFromDistMatNJ(vector<int>* seqWeights, DistMatrix* distMat, 
+                                       Alignment *alignPtr, int seq1, int _nSeqs, 
+                                       string* phylipName, bool* success)
+{
+    string copyOfPhylipName = string(*phylipName);     
+    int nSeqs = _nSeqs;
+    int status = 0;
+    
+    generateTreeFromDistMatNJ(distMat, alignPtr, seq1, nSeqs, phylipName, success);
+    
+    Tree groupTree;
+    status = readTreeAndCalcWeightsNJ(&groupTree, alignPtr, distMat, phylipName,
+                                          seqWeights, seq1, nSeqs);
+    if(status == 0)
+    {
+        *success = false;
+    }
+    else
+    {
+        *success = true;
+    }
+}
+
+void
+TreeInterface::getWeightsForProfileAlignNJ(Alignment* alignPtr, DistMatrix* distMat, 
+                                           string* p1TreeName, vector<int>* p1Weights, string* p2TreeName, 
+                                           vector<int>* p2Weights, int numSeqs, int profile1NumSeqs, bool useTree1, 
+                                           bool useTree2, bool* success)
+{
+    if(!useTree1)
+    {
+        if (profile1NumSeqs >= 2) 
+        {        
+            generateTreeFromDistMatNJ(distMat, alignPtr, 1, profile1NumSeqs, p1TreeName,
+                                        success);
+        }    
+    }
+    
+    if(!useTree2)
+    {
+        if(numSeqs - profile1NumSeqs >= 2) 
+        {        
+            generateTreeFromDistMatNJ(distMat, alignPtr, profile1NumSeqs + 1, 
+                                      numSeqs - profile1NumSeqs, p2TreeName, success);
+        }  
+    }
+    
+    if (userParameters->getNewTree1File() || userParameters->getNewTree2File()) 
+    {
+        *success = false;
+        return;
+    }    
+    // MSA->CALCPAIRWISE
+    
+    MSA* msaObj = new MSA();
+        
+    int count = msaObj->calcPairwiseForProfileAlign(alignPtr, distMat);
+    
+    if (count == 0) 
+    {
+        *success = false;
+        return;
+    }
+    
+    Tree groupTree1, groupTree2;
+    int status = 0;
+        
+    if (profile1NumSeqs >= 2)
+    {
+        status = groupTree1.readTree(alignPtr, p1TreeName->c_str(), 0, profile1NumSeqs);
+        if (status == 0)
+        {
+            *success = false;
+            return;
+        }
+    }
+    
+    groupTree1.calcSeqWeights(0, profile1NumSeqs, p1Weights);
+    
+    if (profile1NumSeqs >= 2)
+    {
+        groupTree1.clearTree(NULL);
+    }
+    
+    if (numSeqs - profile1NumSeqs >= 2)
+    {
+        status = groupTree2.readTree(alignPtr, p2TreeName->c_str(), profile1NumSeqs, numSeqs);
+        if (status == 0)
+        {
+            *success = false;
+            return;
+        }
+    }
+    
+    groupTree2.calcSeqWeights(profile1NumSeqs, numSeqs, p2Weights);
+
+
+    /* clear the memory for the phylogenetic tree */
+
+    if (numSeqs - profile1NumSeqs >= 2)
+    {
+        groupTree2.clearTree(NULL);
+    }
+    
+    /**
+     * Convert distances to similarities!!!!!!
+     */
+    for (int i = 1; i < numSeqs; i++)
+    {
+        for (int j = i + 1; j <= numSeqs; j++)
+        {
+            (*distMat)(i, j) = 100.0 - (*distMat)(i, j) * 100.0;
+            (*distMat)(j, i) = (*distMat)(i, j);
+        }
+    }    
+      
+    *success = true;
+            
+}
+
+void
+TreeInterface::generateTreeFromDistMatNJ(DistMatrix* distMat, Alignment *alignPtr, 
+                                         int seq1, int nSeqs, 
+                                         string* phylipName, bool* success)
+{
+    string copyOfPhylipName = string(*phylipName);     
+    
+    if (nSeqs >= 2) 
+    {
+        UnRootedClusterTree* clusterTree = new UnRootedClusterTree;   
+        clusterTree->treeFromDistMatrix(distMat, alignPtr, seq1, nSeqs, copyOfPhylipName);
+                                        
+        *phylipName = copyOfPhylipName;
+        // AW: message outputted by OutputFile function
+        // utilityObject->info("Guide tree        file created:   [%s]",
+        //                              phylipName->c_str());        
+        delete clusterTree;
+    }
+    *success = true;
+}
+
+auto_ptr<AlignmentSteps>
+TreeInterface::getWeightsAndStepsFromTreeNJ(Alignment* alignPtr, 
+                                            DistMatrix* distMat, string* treeName,
+                                            vector<int>* seqWeights, int fSeq, int numSeqs, 
+                                            bool* success)
+{
+    auto_ptr<AlignmentSteps> progSteps;
+    Tree groupTree; 
+    if(numSeqs == 1)
+    {
+        utilityObject->info("Only 1 sequence, cannot do multiple alignment\n");
+        *success = false;
+        return progSteps;
+    }
+    int status;
+    status = readTreeAndCalcWeightsNJ(&groupTree, alignPtr, distMat, treeName, 
+                                      seqWeights, fSeq, numSeqs);
+    
+    if (status == 0)
+    {
+        *success = false;
+        return progSteps;
+    }
+    
+    progSteps = groupTree.createSets(0, numSeqs);
+    int _numSteps = progSteps->getNumSteps();
+    utilityObject->info("There are %d groups", _numSteps);        
+    // clear the memory used for the phylogenetic tree
+
+    if (numSeqs >= 2)
+    {
+        groupTree.clearTree(NULL);
+    }
+    
+    *success = true;            
+    
+    return progSteps;
+
+}
+
+/**
+ * UPGMA functions
+ *
+ */
+auto_ptr<AlignmentSteps>
+TreeInterface::getWeightsAndStepsFromDistMatUPGMA(vector<int>* seqWeights, 
+                                                  DistMatrix* distMat, Alignment *alignPtr, 
+                                                  int seq1, int nSeqs, string* phylipName, bool* success)
+{
+    auto_ptr<AlignmentSteps> progSteps;
+    RootedGuideTree guideTree;   
+
+    progSteps = generateTreeFromDistMatUPGMA(&guideTree, distMat, alignPtr, seq1, nSeqs,
+                                             phylipName, success);
+
+    guideTree.calcSeqWeights(0, nSeqs, seqWeights);
+    
+    distMat->makeSimilarityMatrix();
+                                                    
+    return progSteps;
+}                                                                                             
+
+auto_ptr<AlignmentSteps> TreeInterface::generateTreeFromDistMatUPGMA(RootedGuideTree* guideTree, DistMatrix* distMat, Alignment *alignPtr, int seq1, int nSeqs, 
+                                            string* phylipName, bool* success)
+{
+    auto_ptr<AlignmentSteps> progSteps;
+    string copyOfPhylipName = string(*phylipName);     
+    
+    if (nSeqs >= 2) 
+    {
+        RootedClusterTree clusterTree;   
+        progSteps = clusterTree.treeFromDistMatrix(guideTree, distMat, alignPtr, seq1,
+                                                   nSeqs, copyOfPhylipName);
+                                        
+        *phylipName = copyOfPhylipName;
+        // AW: message outputted by OutputFile function
+        // utilityObject->info("Guide tree        file created:   [%s]",
+        //                              phylipName->c_str());        
+    }
+    *success = true;
+    return progSteps;
+}
+                                  
+void TreeInterface::getWeightsFromDistMatUPGMA(vector<int>* seqWeights, DistMatrix* distMat, 
+                                   Alignment *alignPtr, int seq1, int nSeqs, 
+                                   string* phylipName, bool* success)
+{
+    getWeightsAndStepsFromDistMatUPGMA(seqWeights, distMat, alignPtr, 
+                                 seq1, nSeqs, phylipName, success);
+}
+
+/**
+ * The function getWeightsForProfileAlignUPGMA is used to generate the sequence weights
+ * that will be used in a profile alignment. It also recalculates the distance matrix, and 
+ * then returns it as a similarity matrix. This function uses the NJ code to read in a tree
+ * from a file if we are using a previous tree. This is because that part of the code is able
+ * to do the finding of the root etc.
+ */
+void TreeInterface::getWeightsForProfileAlignUPGMA(Alignment* alignPtr, DistMatrix* distMat, 
+                             string* p1TreeName, vector<int>* p1Weights, string* p2TreeName, 
+                    vector<int>* p2Weights, int numSeqs, int profile1NumSeqs, bool useTree1, 
+                    bool useTree2, bool* success)
+{
+    int status = 0;
+    
+    if(useTree1)
+    {
+        // Use the code to read in the tree and get the seqWeights
+        Tree groupTree1;
+        if (profile1NumSeqs >= 2)
+        {
+            status = groupTree1.readTree(alignPtr, p1TreeName->c_str(), 0, profile1NumSeqs);
+            if (status == 0)
+            {
+                *success = false;
+                return;
+            }
+        }
+        groupTree1.calcSeqWeights(0, profile1NumSeqs, p1Weights);
+    
+        if (profile1NumSeqs >= 2)
+        {
+            groupTree1.clearTree(NULL);
+        }                
+    }
+    else
+    {
+        if (profile1NumSeqs >= 2) 
+        {        
+            RootedGuideTree guideTree;
+            generateTreeFromDistMatUPGMA(&guideTree, distMat, alignPtr, 1, profile1NumSeqs, 
+                                         p1TreeName, success);
+            guideTree.calcSeqWeights(0, profile1NumSeqs, p1Weights);
+        }                                      
+    }
+    
+    if(useTree2)
+    {
+        Tree groupTree2;
+        if (numSeqs - profile1NumSeqs >= 2)
+        {
+            status = groupTree2.readTree(alignPtr, p2TreeName->c_str(), 
+                                         profile1NumSeqs, numSeqs);
+            if (status == 0)
+            {
+                *success = false;
+                return;
+            }
+        }
+        groupTree2.calcSeqWeights(profile1NumSeqs, numSeqs, p2Weights);
+    
+        if (numSeqs - profile1NumSeqs >= 2)
+        {
+            groupTree2.clearTree(NULL);
+        }    
+    }
+    else
+    {
+        if(numSeqs - profile1NumSeqs >= 2) 
+        {        
+            RootedGuideTree guideTree;
+            generateTreeFromDistMatUPGMA(&guideTree, distMat, alignPtr, profile1NumSeqs + 1,
+                                         numSeqs - profile1NumSeqs, p2TreeName, success);
+            guideTree.calcSeqWeights(profile1NumSeqs, numSeqs, p2Weights);
+        }    
+    }
+    
+    if (userParameters->getNewTree1File() || userParameters->getNewTree2File()) 
+    {
+        *success = false;
+        return;
+    }    
+    
+    MSA* msaObj = new MSA();
+        
+    int count = msaObj->calcPairwiseForProfileAlign(alignPtr, distMat);
+    
+    delete msaObj;
+    
+    if (count == 0) 
+    {
+        *success = false;
+        return;
+    }
+    
+    /**
+     * Convert distances to similarities!!!!!!
+     */   
+    distMat->makeSimilarityMatrix();
+      
+    *success = true;
+
+}
+
+/**
+ * Mark 10-5-2007: Bug fix # 42
+ * The following function is to be used only for calculating the weights for the Qt
+ * low scoring segments.
+ */
+void TreeInterface::getWeightsForQtLowScore(vector<int>* seqWeights, DistMatrix* distMat, 
+                                   Alignment *alignPtr, int seq1, int nSeqs, 
+                                   string* phylipName, bool* success)
+{
+    string copyOfPhylipName = string(*phylipName);     
+    int _nSeqs = nSeqs;
+    int status = 0;
+    
+    generateTreeFromDistMatNJ(distMat, alignPtr, seq1, _nSeqs, phylipName, success);
+    
+    Tree groupTree;
+
+    status = 0;
+    if (_nSeqs >= 2)
+    {
+        status = groupTree.readTree(alignPtr, phylipName->c_str(), seq1 - 1,
+                                    seq1 + _nSeqs-1); // mark 22-5-07
+        if(status == 0)
+        {
+            *success = false;
+            return;
+        }
+        else
+        {
+            *success = true;
+        }
+    }
+
+    groupTree.calcSeqWeights(seq1 - 1, seq1 + _nSeqs - 1, seqWeights); // mark 23-5-07
+}
+                                                           
+}