--- /dev/null
+/**
+ * Author: Mark Larkin
+ *
+ * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson.
+ */
+/**
+ * Changes:
+ * Mark 10-5-2007: Bug fix # 42. Added getWeightsForQtLowScore function.
+ * Mark 22-5-2007: Made a change to getWeightsForQtLowScore
+ * Mark 23-5-2007: Made a change to getWeightsForQtLowScore
+ */
+#ifdef HAVE_CONFIG_H
+ #include "config.h"
+#endif
+#include "TreeInterface.h"
+#include <sstream>
+#include "UnRootedClusterTree.h"
+#include "Tree.h"
+#include "../multipleAlign/MSA.h"
+#include "../general/userparams.h"
+#include "../general/debuglogObject.h"
+#include "UPGMA/RootedGuideTree.h"
+#include "UPGMA/RootedClusterTree.h"
+namespace clustalw
+{
+
+/**
+ * This will be called by align!
+ *
+ */
+auto_ptr<AlignmentSteps>
+TreeInterface::getWeightsAndStepsFromDistMat(vector<int>* seqWeights, DistMatrix* distMat,
+ Alignment *alignPtr, int seq1, int nSeqs,
+ string* phylipName, bool* success)
+{
+ #if DEBUGFULL
+ if(logObject && DEBUGLOG)
+ {
+ logObject->logMsg("In getWeightsAndStepsFromDistMat\n");
+ }
+ #endif
+ if(userParameters->getClusterAlgorithm() == UPGMA)
+ {
+ return getWeightsAndStepsFromDistMatUPGMA(seqWeights, distMat, alignPtr,
+ seq1, nSeqs, phylipName, success);
+ }
+ else
+ {
+ return getWeightsAndStepsFromDistMatNJ(seqWeights, distMat, alignPtr,
+ seq1, nSeqs, phylipName, success);
+ }
+}
+
+
+/**
+ * This will be called by sequencesAlignToProfile. This function will put the distMat into
+ * a similarity matrix.
+ */
+void
+TreeInterface::getWeightsFromDistMat(vector<int>* seqWeights, DistMatrix* distMat,
+ Alignment *alignPtr, int seq1, int nSeqs,
+ string* phylipName, bool* success)
+{
+ if(userParameters->getClusterAlgorithm() == UPGMA)
+ {
+ getWeightsFromDistMatUPGMA(seqWeights, distMat, alignPtr, seq1, nSeqs, phylipName,
+ success);
+ }
+ else
+ {
+ getWeightsFromDistMatNJ(seqWeights, distMat, alignPtr, seq1, nSeqs, phylipName,
+ success);
+ }
+}
+
+/**
+ * This function will be called by profileAlign
+ */
+void
+TreeInterface::getWeightsForProfileAlign(Alignment* alignPtr, DistMatrix* distMat,
+ string* p1TreeName, vector<int>* p1Weights, string* p2TreeName,
+ vector<int>* p2Weights, int numSeqs, int profile1NumSeqs, bool useTree1,
+ bool useTree2, bool* success)
+{
+ if(userParameters->getClusterAlgorithm() == UPGMA)
+ {
+ getWeightsForProfileAlignUPGMA(alignPtr, distMat, p1TreeName, p1Weights, p2TreeName,
+ p2Weights, numSeqs, profile1NumSeqs, useTree1, useTree2,
+ success);
+ }
+ else
+ {
+ getWeightsForProfileAlignNJ(alignPtr, distMat, p1TreeName, p1Weights, p2TreeName,
+ p2Weights, numSeqs, profile1NumSeqs, useTree1, useTree2,
+ success);
+ }
+}
+
+/**
+ * This function will be called by doAlignUseOldGuideTree
+ *
+ */
+auto_ptr<AlignmentSteps>
+TreeInterface::getWeightsAndStepsFromTree(Alignment* alignPtr,
+ DistMatrix* distMat, string* treeName,
+ vector<int>* seqWeights, int fSeq,
+ int numSeqs, bool* success)
+{
+ /**
+ * This will only use the NJ. It will not use UPGMA
+ */
+ return getWeightsAndStepsFromTreeNJ(alignPtr, distMat, treeName, seqWeights,
+ fSeq, numSeqs, success);
+}
+
+/**
+ * Called by sequencesAlignToProfile
+ */
+int
+TreeInterface::getWeightsFromGuideTree(Alignment* alignPtr, DistMatrix* distMat,
+ string* treeName, vector<int>* seqWeights, int fSeq,
+ int nSeqs, bool* success)
+{
+ /**
+ * This will only use the NJ. It will not use UPGMA
+ */
+ return getWeightsFromGuideTreeNJ(alignPtr, distMat, treeName, seqWeights, fSeq, nSeqs,
+ success);
+}
+
+/**
+ * This will be called by doGuideTreeOnly. This does not put the distMat into similarity.
+ *
+ */
+void
+TreeInterface::generateTreeFromDistMat(DistMatrix* distMat, Alignment *alignPtr,
+ int seq1, int nSeqs,
+ string* phylipName, bool* success)
+{
+ /**
+ * This function does not put distMat into similarities
+ */
+ if(userParameters->getClusterAlgorithm() == UPGMA)
+ {
+ RootedGuideTree guideTree;
+ generateTreeFromDistMatUPGMA(&guideTree, distMat, alignPtr, seq1, nSeqs,
+ phylipName, success);
+ }
+ else
+ {
+ generateTreeFromDistMatNJ(distMat, alignPtr, seq1, nSeqs, phylipName, success);
+ }
+}
+
+void
+TreeInterface::treeFromAlignment(TreeNames* treeNames, Alignment *alignPtr)
+{
+ if(userParameters->getClusterAlgorithm() == UPGMA)
+ {
+ RootedClusterTree clusterTree;
+ clusterTree.treeFromAlignment(treeNames, alignPtr);
+ }
+ else
+ {
+ UnRootedClusterTree clusterTree;
+ clusterTree.treeFromAlignment(treeNames, alignPtr);
+ }
+}
+
+void
+TreeInterface::bootstrapTree(TreeNames* treeNames, Alignment *alignPtr)
+{
+ /**
+ * NOTE We only do bootstrapping on the NJ tree.
+ */
+ UnRootedClusterTree clusterTree;
+ clusterTree.bootstrapTree(treeNames, alignPtr);
+}
+
+
+/**
+ * Private functions!!!!
+ *
+ */
+
+
+/** *******************
+ *
+ * Neighbour joining functions
+ */
+/**
+ * Note: After this function has been called the distance matrix will all be in similarities.
+ *
+ */
+auto_ptr<AlignmentSteps>
+TreeInterface::getWeightsAndStepsFromDistMatNJ(vector<int>* seqWeights, DistMatrix* distMat,
+ Alignment *alignPtr, int seq1, int nSeqs,
+ string* phylipName, bool* success)
+{
+ auto_ptr<AlignmentSteps> progSteps;
+ generateTreeFromDistMatNJ(distMat, alignPtr, seq1, nSeqs, phylipName, success);
+
+ progSteps = getWeightsAndStepsUseOldGuideTreeNJ(distMat, alignPtr, phylipName,
+ seqWeights, seq1, nSeqs, success);
+ return progSteps;
+}
+
+auto_ptr<AlignmentSteps>
+TreeInterface::getWeightsAndStepsUseOldGuideTreeNJ(DistMatrix* distMat, Alignment *alignPtr,
+ string* treeName, vector<int>* seqWeights,
+ int fSeq, int nSeqs, bool* success)
+{
+ Tree groupTree;
+ auto_ptr<AlignmentSteps> progSteps;
+
+ if(nSeqs == 1)
+ {
+ utilityObject->info("Only 1 sequence, cannot do multiple alignment\n");
+ *success = false;
+ return progSteps;
+ }
+
+ int status = 0;
+
+ status = readTreeAndCalcWeightsNJ(&groupTree, alignPtr, distMat, treeName,
+ seqWeights, fSeq, nSeqs);
+
+ if(status == 0)
+ {
+ *success = false;
+ return progSteps;
+ }
+
+ progSteps = groupTree.createSets(0, nSeqs);
+ int _numSteps = progSteps->getNumSteps();
+
+ utilityObject->info("There are %d groups", _numSteps);
+ // clear the memory used for the phylogenetic tree
+
+ if (nSeqs >= 2)
+ {
+ groupTree.clearTree(NULL);
+ }
+
+ *success = true;
+ return progSteps;
+}
+
+
+
+/**
+ * The function readTreeAndCalcWeightsNJ is used to read in the tree given the treeName.
+ * It then calls the appropriate functions to calc the seqWeights and make sure the matrix
+ * is in similarity mode.
+ */
+int
+TreeInterface::readTreeAndCalcWeightsNJ(Tree* groupTree, Alignment* alignPtr,
+ DistMatrix* distMat, string* treeName,
+ vector<int>* seqWeights, int fSeq, int nSeqs)
+{
+ int status = 0;
+ if (nSeqs >= 2)
+ {
+ status = groupTree->readTree(alignPtr, treeName->c_str(), fSeq - 1, nSeqs);
+ if (status == 0)
+ {
+ return status;
+ }
+ }
+
+ groupTree->calcSeqWeights(fSeq - 1, nSeqs, seqWeights);
+
+ status = groupTree->calcSimilarities(alignPtr, distMat);
+
+ return status;
+}
+
+int
+TreeInterface::getWeightsFromGuideTreeNJ(Alignment* alignPtr, DistMatrix* distMat,
+ string* treeName, vector<int>* seqWeights, int fSeq, int nSeqs,
+ bool* success)
+{
+ Tree groupTree;
+ int status = readTreeAndCalcWeightsNJ(&groupTree, alignPtr, distMat, treeName,
+ seqWeights, fSeq, nSeqs);
+ if(status == 0)
+ {
+ *success = false;
+ }
+ else
+ {
+ *success = true;
+ }
+ return status;
+}
+
+void
+TreeInterface::getWeightsFromDistMatNJ(vector<int>* seqWeights, DistMatrix* distMat,
+ Alignment *alignPtr, int seq1, int _nSeqs,
+ string* phylipName, bool* success)
+{
+ string copyOfPhylipName = string(*phylipName);
+ int nSeqs = _nSeqs;
+ int status = 0;
+
+ generateTreeFromDistMatNJ(distMat, alignPtr, seq1, nSeqs, phylipName, success);
+
+ Tree groupTree;
+ status = readTreeAndCalcWeightsNJ(&groupTree, alignPtr, distMat, phylipName,
+ seqWeights, seq1, nSeqs);
+ if(status == 0)
+ {
+ *success = false;
+ }
+ else
+ {
+ *success = true;
+ }
+}
+
+void
+TreeInterface::getWeightsForProfileAlignNJ(Alignment* alignPtr, DistMatrix* distMat,
+ string* p1TreeName, vector<int>* p1Weights, string* p2TreeName,
+ vector<int>* p2Weights, int numSeqs, int profile1NumSeqs, bool useTree1,
+ bool useTree2, bool* success)
+{
+ if(!useTree1)
+ {
+ if (profile1NumSeqs >= 2)
+ {
+ generateTreeFromDistMatNJ(distMat, alignPtr, 1, profile1NumSeqs, p1TreeName,
+ success);
+ }
+ }
+
+ if(!useTree2)
+ {
+ if(numSeqs - profile1NumSeqs >= 2)
+ {
+ generateTreeFromDistMatNJ(distMat, alignPtr, profile1NumSeqs + 1,
+ numSeqs - profile1NumSeqs, p2TreeName, success);
+ }
+ }
+
+ if (userParameters->getNewTree1File() || userParameters->getNewTree2File())
+ {
+ *success = false;
+ return;
+ }
+ // MSA->CALCPAIRWISE
+
+ MSA* msaObj = new MSA();
+
+ int count = msaObj->calcPairwiseForProfileAlign(alignPtr, distMat);
+
+ if (count == 0)
+ {
+ *success = false;
+ return;
+ }
+
+ Tree groupTree1, groupTree2;
+ int status = 0;
+
+ if (profile1NumSeqs >= 2)
+ {
+ status = groupTree1.readTree(alignPtr, p1TreeName->c_str(), 0, profile1NumSeqs);
+ if (status == 0)
+ {
+ *success = false;
+ return;
+ }
+ }
+
+ groupTree1.calcSeqWeights(0, profile1NumSeqs, p1Weights);
+
+ if (profile1NumSeqs >= 2)
+ {
+ groupTree1.clearTree(NULL);
+ }
+
+ if (numSeqs - profile1NumSeqs >= 2)
+ {
+ status = groupTree2.readTree(alignPtr, p2TreeName->c_str(), profile1NumSeqs, numSeqs);
+ if (status == 0)
+ {
+ *success = false;
+ return;
+ }
+ }
+
+ groupTree2.calcSeqWeights(profile1NumSeqs, numSeqs, p2Weights);
+
+
+ /* clear the memory for the phylogenetic tree */
+
+ if (numSeqs - profile1NumSeqs >= 2)
+ {
+ groupTree2.clearTree(NULL);
+ }
+
+ /**
+ * Convert distances to similarities!!!!!!
+ */
+ for (int i = 1; i < numSeqs; i++)
+ {
+ for (int j = i + 1; j <= numSeqs; j++)
+ {
+ (*distMat)(i, j) = 100.0 - (*distMat)(i, j) * 100.0;
+ (*distMat)(j, i) = (*distMat)(i, j);
+ }
+ }
+
+ *success = true;
+
+}
+
+void
+TreeInterface::generateTreeFromDistMatNJ(DistMatrix* distMat, Alignment *alignPtr,
+ int seq1, int nSeqs,
+ string* phylipName, bool* success)
+{
+ string copyOfPhylipName = string(*phylipName);
+
+ if (nSeqs >= 2)
+ {
+ UnRootedClusterTree* clusterTree = new UnRootedClusterTree;
+ clusterTree->treeFromDistMatrix(distMat, alignPtr, seq1, nSeqs, copyOfPhylipName);
+
+ *phylipName = copyOfPhylipName;
+ // AW: message outputted by OutputFile function
+ // utilityObject->info("Guide tree file created: [%s]",
+ // phylipName->c_str());
+ delete clusterTree;
+ }
+ *success = true;
+}
+
+auto_ptr<AlignmentSteps>
+TreeInterface::getWeightsAndStepsFromTreeNJ(Alignment* alignPtr,
+ DistMatrix* distMat, string* treeName,
+ vector<int>* seqWeights, int fSeq, int numSeqs,
+ bool* success)
+{
+ auto_ptr<AlignmentSteps> progSteps;
+ Tree groupTree;
+ if(numSeqs == 1)
+ {
+ utilityObject->info("Only 1 sequence, cannot do multiple alignment\n");
+ *success = false;
+ return progSteps;
+ }
+ int status;
+ status = readTreeAndCalcWeightsNJ(&groupTree, alignPtr, distMat, treeName,
+ seqWeights, fSeq, numSeqs);
+
+ if (status == 0)
+ {
+ *success = false;
+ return progSteps;
+ }
+
+ progSteps = groupTree.createSets(0, numSeqs);
+ int _numSteps = progSteps->getNumSteps();
+ utilityObject->info("There are %d groups", _numSteps);
+ // clear the memory used for the phylogenetic tree
+
+ if (numSeqs >= 2)
+ {
+ groupTree.clearTree(NULL);
+ }
+
+ *success = true;
+
+ return progSteps;
+
+}
+
+/**
+ * UPGMA functions
+ *
+ */
+
+
+auto_ptr<AlignmentSteps>
+TreeInterface::getWeightsAndStepsFromDistMatUPGMA(vector<int>* seqWeights,
+ DistMatrix* distMat, Alignment *alignPtr,
+ int seq1, int nSeqs, string* phylipName, bool* success)
+{
+ auto_ptr<AlignmentSteps> progSteps;
+ RootedGuideTree guideTree;
+
+ progSteps = generateTreeFromDistMatUPGMA(&guideTree, distMat, alignPtr, seq1, nSeqs,
+ phylipName, success);
+
+ guideTree.calcSeqWeights(0, nSeqs, seqWeights);
+
+ distMat->makeSimilarityMatrix();
+
+ return progSteps;
+}
+
+auto_ptr<AlignmentSteps> TreeInterface::generateTreeFromDistMatUPGMA(RootedGuideTree* guideTree, DistMatrix* distMat, Alignment *alignPtr, int seq1, int nSeqs,
+ string* phylipName, bool* success)
+{
+ auto_ptr<AlignmentSteps> progSteps;
+ string copyOfPhylipName = string(*phylipName);
+
+ if (nSeqs >= 2)
+ {
+ RootedClusterTree clusterTree;
+ progSteps = clusterTree.treeFromDistMatrix(guideTree, distMat, alignPtr, seq1,
+ nSeqs, copyOfPhylipName);
+
+ *phylipName = copyOfPhylipName;
+ // AW: message outputted by OutputFile function
+ // utilityObject->info("Guide tree file created: [%s]",
+ // phylipName->c_str());
+ }
+ *success = true;
+ return progSteps;
+}
+
+void TreeInterface::getWeightsFromDistMatUPGMA(vector<int>* seqWeights, DistMatrix* distMat,
+ Alignment *alignPtr, int seq1, int nSeqs,
+ string* phylipName, bool* success)
+{
+ getWeightsAndStepsFromDistMatUPGMA(seqWeights, distMat, alignPtr,
+ seq1, nSeqs, phylipName, success);
+}
+
+/**
+ * The function getWeightsForProfileAlignUPGMA is used to generate the sequence weights
+ * that will be used in a profile alignment. It also recalculates the distance matrix, and
+ * then returns it as a similarity matrix. This function uses the NJ code to read in a tree
+ * from a file if we are using a previous tree. This is because that part of the code is able
+ * to do the finding of the root etc.
+ */
+void TreeInterface::getWeightsForProfileAlignUPGMA(Alignment* alignPtr, DistMatrix* distMat,
+ string* p1TreeName, vector<int>* p1Weights, string* p2TreeName,
+ vector<int>* p2Weights, int numSeqs, int profile1NumSeqs, bool useTree1,
+ bool useTree2, bool* success)
+{
+ int status = 0;
+
+ if(useTree1)
+ {
+ // Use the code to read in the tree and get the seqWeights
+ Tree groupTree1;
+ if (profile1NumSeqs >= 2)
+ {
+ status = groupTree1.readTree(alignPtr, p1TreeName->c_str(), 0, profile1NumSeqs);
+ if (status == 0)
+ {
+ *success = false;
+ return;
+ }
+ }
+ groupTree1.calcSeqWeights(0, profile1NumSeqs, p1Weights);
+
+ if (profile1NumSeqs >= 2)
+ {
+ groupTree1.clearTree(NULL);
+ }
+ }
+ else
+ {
+ if (profile1NumSeqs >= 2)
+ {
+ RootedGuideTree guideTree;
+ generateTreeFromDistMatUPGMA(&guideTree, distMat, alignPtr, 1, profile1NumSeqs,
+ p1TreeName, success);
+ guideTree.calcSeqWeights(0, profile1NumSeqs, p1Weights);
+ }
+ }
+
+ if(useTree2)
+ {
+ Tree groupTree2;
+ if (numSeqs - profile1NumSeqs >= 2)
+ {
+ status = groupTree2.readTree(alignPtr, p2TreeName->c_str(),
+ profile1NumSeqs, numSeqs);
+ if (status == 0)
+ {
+ *success = false;
+ return;
+ }
+ }
+ groupTree2.calcSeqWeights(profile1NumSeqs, numSeqs, p2Weights);
+
+ if (numSeqs - profile1NumSeqs >= 2)
+ {
+ groupTree2.clearTree(NULL);
+ }
+ }
+ else
+ {
+ if(numSeqs - profile1NumSeqs >= 2)
+ {
+ RootedGuideTree guideTree;
+ generateTreeFromDistMatUPGMA(&guideTree, distMat, alignPtr, profile1NumSeqs + 1,
+ numSeqs - profile1NumSeqs, p2TreeName, success);
+ guideTree.calcSeqWeights(profile1NumSeqs, numSeqs, p2Weights);
+ }
+ }
+
+ if (userParameters->getNewTree1File() || userParameters->getNewTree2File())
+ {
+ *success = false;
+ return;
+ }
+
+ MSA* msaObj = new MSA();
+
+ int count = msaObj->calcPairwiseForProfileAlign(alignPtr, distMat);
+
+ delete msaObj;
+
+ if (count == 0)
+ {
+ *success = false;
+ return;
+ }
+
+ /**
+ * Convert distances to similarities!!!!!!
+ */
+ distMat->makeSimilarityMatrix();
+
+ *success = true;
+
+}
+
+/**
+ * Mark 10-5-2007: Bug fix # 42
+ * The following function is to be used only for calculating the weights for the Qt
+ * low scoring segments.
+ */
+void TreeInterface::getWeightsForQtLowScore(vector<int>* seqWeights, DistMatrix* distMat,
+ Alignment *alignPtr, int seq1, int nSeqs,
+ string* phylipName, bool* success)
+{
+ string copyOfPhylipName = string(*phylipName);
+ int _nSeqs = nSeqs;
+ int status = 0;
+
+ generateTreeFromDistMatNJ(distMat, alignPtr, seq1, _nSeqs, phylipName, success);
+
+ Tree groupTree;
+
+ status = 0;
+ if (_nSeqs >= 2)
+ {
+ status = groupTree.readTree(alignPtr, phylipName->c_str(), seq1 - 1,
+ seq1 + _nSeqs-1); // mark 22-5-07
+ if(status == 0)
+ {
+ *success = false;
+ return;
+ }
+ else
+ {
+ *success = true;
+ }
+ }
+
+ groupTree.calcSeqWeights(seq1 - 1, seq1 + _nSeqs - 1, seqWeights); // mark 23-5-07
+}
+
+}