/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ /** * Changes: * Mark 10-5-2007: Bug fix # 42. Added getWeightsForQtLowScore function. * Mark 22-5-2007: Made a change to getWeightsForQtLowScore * Mark 23-5-2007: Made a change to getWeightsForQtLowScore */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include "TreeInterface.h" #include #include "UnRootedClusterTree.h" #include "Tree.h" #include "../multipleAlign/MSA.h" #include "../general/userparams.h" #include "../general/debuglogObject.h" #include "UPGMA/RootedGuideTree.h" #include "UPGMA/RootedClusterTree.h" namespace clustalw { /** * This will be called by align! * */ auto_ptr TreeInterface::getWeightsAndStepsFromDistMat(vector* seqWeights, DistMatrix* distMat, Alignment *alignPtr, int seq1, int nSeqs, string* phylipName, bool* success) { #if DEBUGFULL if(logObject && DEBUGLOG) { logObject->logMsg("In getWeightsAndStepsFromDistMat\n"); } #endif if(userParameters->getClusterAlgorithm() == UPGMA) { return getWeightsAndStepsFromDistMatUPGMA(seqWeights, distMat, alignPtr, seq1, nSeqs, phylipName, success); } else { return getWeightsAndStepsFromDistMatNJ(seqWeights, distMat, alignPtr, seq1, nSeqs, phylipName, success); } } /** * This will be called by sequencesAlignToProfile. This function will put the distMat into * a similarity matrix. */ void TreeInterface::getWeightsFromDistMat(vector* seqWeights, DistMatrix* distMat, Alignment *alignPtr, int seq1, int nSeqs, string* phylipName, bool* success) { if(userParameters->getClusterAlgorithm() == UPGMA) { getWeightsFromDistMatUPGMA(seqWeights, distMat, alignPtr, seq1, nSeqs, phylipName, success); } else { getWeightsFromDistMatNJ(seqWeights, distMat, alignPtr, seq1, nSeqs, phylipName, success); } } /** * This function will be called by profileAlign */ void TreeInterface::getWeightsForProfileAlign(Alignment* alignPtr, DistMatrix* distMat, string* p1TreeName, vector* p1Weights, string* p2TreeName, vector* p2Weights, int numSeqs, int profile1NumSeqs, bool useTree1, bool useTree2, bool* success) { if(userParameters->getClusterAlgorithm() == UPGMA) { getWeightsForProfileAlignUPGMA(alignPtr, distMat, p1TreeName, p1Weights, p2TreeName, p2Weights, numSeqs, profile1NumSeqs, useTree1, useTree2, success); } else { getWeightsForProfileAlignNJ(alignPtr, distMat, p1TreeName, p1Weights, p2TreeName, p2Weights, numSeqs, profile1NumSeqs, useTree1, useTree2, success); } } /** * This function will be called by doAlignUseOldGuideTree * */ auto_ptr TreeInterface::getWeightsAndStepsFromTree(Alignment* alignPtr, DistMatrix* distMat, string* treeName, vector* seqWeights, int fSeq, int numSeqs, bool* success) { /** * This will only use the NJ. It will not use UPGMA */ return getWeightsAndStepsFromTreeNJ(alignPtr, distMat, treeName, seqWeights, fSeq, numSeqs, success); } /** * Called by sequencesAlignToProfile */ int TreeInterface::getWeightsFromGuideTree(Alignment* alignPtr, DistMatrix* distMat, string* treeName, vector* seqWeights, int fSeq, int nSeqs, bool* success) { /** * This will only use the NJ. It will not use UPGMA */ return getWeightsFromGuideTreeNJ(alignPtr, distMat, treeName, seqWeights, fSeq, nSeqs, success); } /** * This will be called by doGuideTreeOnly. This does not put the distMat into similarity. * */ void TreeInterface::generateTreeFromDistMat(DistMatrix* distMat, Alignment *alignPtr, int seq1, int nSeqs, string* phylipName, bool* success) { /** * This function does not put distMat into similarities */ if(userParameters->getClusterAlgorithm() == UPGMA) { RootedGuideTree guideTree; generateTreeFromDistMatUPGMA(&guideTree, distMat, alignPtr, seq1, nSeqs, phylipName, success); } else { generateTreeFromDistMatNJ(distMat, alignPtr, seq1, nSeqs, phylipName, success); } } void TreeInterface::treeFromAlignment(TreeNames* treeNames, Alignment *alignPtr) { if(userParameters->getClusterAlgorithm() == UPGMA) { RootedClusterTree clusterTree; clusterTree.treeFromAlignment(treeNames, alignPtr); } else { UnRootedClusterTree clusterTree; clusterTree.treeFromAlignment(treeNames, alignPtr); } } void TreeInterface::bootstrapTree(TreeNames* treeNames, Alignment *alignPtr) { /** * NOTE We only do bootstrapping on the NJ tree. */ UnRootedClusterTree clusterTree; clusterTree.bootstrapTree(treeNames, alignPtr); } /** * Private functions!!!! * */ /** ******************* * * Neighbour joining functions */ /** * Note: After this function has been called the distance matrix will all be in similarities. * */ auto_ptr TreeInterface::getWeightsAndStepsFromDistMatNJ(vector* seqWeights, DistMatrix* distMat, Alignment *alignPtr, int seq1, int nSeqs, string* phylipName, bool* success) { auto_ptr progSteps; generateTreeFromDistMatNJ(distMat, alignPtr, seq1, nSeqs, phylipName, success); progSteps = getWeightsAndStepsUseOldGuideTreeNJ(distMat, alignPtr, phylipName, seqWeights, seq1, nSeqs, success); return progSteps; } auto_ptr TreeInterface::getWeightsAndStepsUseOldGuideTreeNJ(DistMatrix* distMat, Alignment *alignPtr, string* treeName, vector* seqWeights, int fSeq, int nSeqs, bool* success) { Tree groupTree; auto_ptr progSteps; if(nSeqs == 1) { utilityObject->info("Only 1 sequence, cannot do multiple alignment\n"); *success = false; return progSteps; } int status = 0; status = readTreeAndCalcWeightsNJ(&groupTree, alignPtr, distMat, treeName, seqWeights, fSeq, nSeqs); if(status == 0) { *success = false; return progSteps; } progSteps = groupTree.createSets(0, nSeqs); int _numSteps = progSteps->getNumSteps(); utilityObject->info("There are %d groups", _numSteps); // clear the memory used for the phylogenetic tree if (nSeqs >= 2) { groupTree.clearTree(NULL); } *success = true; return progSteps; } /** * The function readTreeAndCalcWeightsNJ is used to read in the tree given the treeName. * It then calls the appropriate functions to calc the seqWeights and make sure the matrix * is in similarity mode. */ int TreeInterface::readTreeAndCalcWeightsNJ(Tree* groupTree, Alignment* alignPtr, DistMatrix* distMat, string* treeName, vector* seqWeights, int fSeq, int nSeqs) { int status = 0; if (nSeqs >= 2) { status = groupTree->readTree(alignPtr, treeName->c_str(), fSeq - 1, nSeqs); if (status == 0) { return status; } } groupTree->calcSeqWeights(fSeq - 1, nSeqs, seqWeights); status = groupTree->calcSimilarities(alignPtr, distMat); return status; } int TreeInterface::getWeightsFromGuideTreeNJ(Alignment* alignPtr, DistMatrix* distMat, string* treeName, vector* seqWeights, int fSeq, int nSeqs, bool* success) { Tree groupTree; int status = readTreeAndCalcWeightsNJ(&groupTree, alignPtr, distMat, treeName, seqWeights, fSeq, nSeqs); if(status == 0) { *success = false; } else { *success = true; } return status; } void TreeInterface::getWeightsFromDistMatNJ(vector* seqWeights, DistMatrix* distMat, Alignment *alignPtr, int seq1, int _nSeqs, string* phylipName, bool* success) { string copyOfPhylipName = string(*phylipName); int nSeqs = _nSeqs; int status = 0; generateTreeFromDistMatNJ(distMat, alignPtr, seq1, nSeqs, phylipName, success); Tree groupTree; status = readTreeAndCalcWeightsNJ(&groupTree, alignPtr, distMat, phylipName, seqWeights, seq1, nSeqs); if(status == 0) { *success = false; } else { *success = true; } } void TreeInterface::getWeightsForProfileAlignNJ(Alignment* alignPtr, DistMatrix* distMat, string* p1TreeName, vector* p1Weights, string* p2TreeName, vector* p2Weights, int numSeqs, int profile1NumSeqs, bool useTree1, bool useTree2, bool* success) { if(!useTree1) { if (profile1NumSeqs >= 2) { generateTreeFromDistMatNJ(distMat, alignPtr, 1, profile1NumSeqs, p1TreeName, success); } } if(!useTree2) { if(numSeqs - profile1NumSeqs >= 2) { generateTreeFromDistMatNJ(distMat, alignPtr, profile1NumSeqs + 1, numSeqs - profile1NumSeqs, p2TreeName, success); } } if (userParameters->getNewTree1File() || userParameters->getNewTree2File()) { *success = false; return; } // MSA->CALCPAIRWISE MSA* msaObj = new MSA(); int count = msaObj->calcPairwiseForProfileAlign(alignPtr, distMat); if (count == 0) { *success = false; return; } Tree groupTree1, groupTree2; int status = 0; if (profile1NumSeqs >= 2) { status = groupTree1.readTree(alignPtr, p1TreeName->c_str(), 0, profile1NumSeqs); if (status == 0) { *success = false; return; } } groupTree1.calcSeqWeights(0, profile1NumSeqs, p1Weights); if (profile1NumSeqs >= 2) { groupTree1.clearTree(NULL); } if (numSeqs - profile1NumSeqs >= 2) { status = groupTree2.readTree(alignPtr, p2TreeName->c_str(), profile1NumSeqs, numSeqs); if (status == 0) { *success = false; return; } } groupTree2.calcSeqWeights(profile1NumSeqs, numSeqs, p2Weights); /* clear the memory for the phylogenetic tree */ if (numSeqs - profile1NumSeqs >= 2) { groupTree2.clearTree(NULL); } /** * Convert distances to similarities!!!!!! */ for (int i = 1; i < numSeqs; i++) { for (int j = i + 1; j <= numSeqs; j++) { (*distMat)(i, j) = 100.0 - (*distMat)(i, j) * 100.0; (*distMat)(j, i) = (*distMat)(i, j); } } *success = true; } void TreeInterface::generateTreeFromDistMatNJ(DistMatrix* distMat, Alignment *alignPtr, int seq1, int nSeqs, string* phylipName, bool* success) { string copyOfPhylipName = string(*phylipName); if (nSeqs >= 2) { UnRootedClusterTree* clusterTree = new UnRootedClusterTree; clusterTree->treeFromDistMatrix(distMat, alignPtr, seq1, nSeqs, copyOfPhylipName); *phylipName = copyOfPhylipName; // AW: message outputted by OutputFile function // utilityObject->info("Guide tree file created: [%s]", // phylipName->c_str()); delete clusterTree; } *success = true; } auto_ptr TreeInterface::getWeightsAndStepsFromTreeNJ(Alignment* alignPtr, DistMatrix* distMat, string* treeName, vector* seqWeights, int fSeq, int numSeqs, bool* success) { auto_ptr progSteps; Tree groupTree; if(numSeqs == 1) { utilityObject->info("Only 1 sequence, cannot do multiple alignment\n"); *success = false; return progSteps; } int status; status = readTreeAndCalcWeightsNJ(&groupTree, alignPtr, distMat, treeName, seqWeights, fSeq, numSeqs); if (status == 0) { *success = false; return progSteps; } progSteps = groupTree.createSets(0, numSeqs); int _numSteps = progSteps->getNumSteps(); utilityObject->info("There are %d groups", _numSteps); // clear the memory used for the phylogenetic tree if (numSeqs >= 2) { groupTree.clearTree(NULL); } *success = true; return progSteps; } /** * UPGMA functions * */ auto_ptr TreeInterface::getWeightsAndStepsFromDistMatUPGMA(vector* seqWeights, DistMatrix* distMat, Alignment *alignPtr, int seq1, int nSeqs, string* phylipName, bool* success) { auto_ptr progSteps; RootedGuideTree guideTree; progSteps = generateTreeFromDistMatUPGMA(&guideTree, distMat, alignPtr, seq1, nSeqs, phylipName, success); guideTree.calcSeqWeights(0, nSeqs, seqWeights); distMat->makeSimilarityMatrix(); return progSteps; } auto_ptr TreeInterface::generateTreeFromDistMatUPGMA(RootedGuideTree* guideTree, DistMatrix* distMat, Alignment *alignPtr, int seq1, int nSeqs, string* phylipName, bool* success) { auto_ptr progSteps; string copyOfPhylipName = string(*phylipName); if (nSeqs >= 2) { RootedClusterTree clusterTree; progSteps = clusterTree.treeFromDistMatrix(guideTree, distMat, alignPtr, seq1, nSeqs, copyOfPhylipName); *phylipName = copyOfPhylipName; // AW: message outputted by OutputFile function // utilityObject->info("Guide tree file created: [%s]", // phylipName->c_str()); } *success = true; return progSteps; } void TreeInterface::getWeightsFromDistMatUPGMA(vector* seqWeights, DistMatrix* distMat, Alignment *alignPtr, int seq1, int nSeqs, string* phylipName, bool* success) { getWeightsAndStepsFromDistMatUPGMA(seqWeights, distMat, alignPtr, seq1, nSeqs, phylipName, success); } /** * The function getWeightsForProfileAlignUPGMA is used to generate the sequence weights * that will be used in a profile alignment. It also recalculates the distance matrix, and * then returns it as a similarity matrix. This function uses the NJ code to read in a tree * from a file if we are using a previous tree. This is because that part of the code is able * to do the finding of the root etc. */ void TreeInterface::getWeightsForProfileAlignUPGMA(Alignment* alignPtr, DistMatrix* distMat, string* p1TreeName, vector* p1Weights, string* p2TreeName, vector* p2Weights, int numSeqs, int profile1NumSeqs, bool useTree1, bool useTree2, bool* success) { int status = 0; if(useTree1) { // Use the code to read in the tree and get the seqWeights Tree groupTree1; if (profile1NumSeqs >= 2) { status = groupTree1.readTree(alignPtr, p1TreeName->c_str(), 0, profile1NumSeqs); if (status == 0) { *success = false; return; } } groupTree1.calcSeqWeights(0, profile1NumSeqs, p1Weights); if (profile1NumSeqs >= 2) { groupTree1.clearTree(NULL); } } else { if (profile1NumSeqs >= 2) { RootedGuideTree guideTree; generateTreeFromDistMatUPGMA(&guideTree, distMat, alignPtr, 1, profile1NumSeqs, p1TreeName, success); guideTree.calcSeqWeights(0, profile1NumSeqs, p1Weights); } } if(useTree2) { Tree groupTree2; if (numSeqs - profile1NumSeqs >= 2) { status = groupTree2.readTree(alignPtr, p2TreeName->c_str(), profile1NumSeqs, numSeqs); if (status == 0) { *success = false; return; } } groupTree2.calcSeqWeights(profile1NumSeqs, numSeqs, p2Weights); if (numSeqs - profile1NumSeqs >= 2) { groupTree2.clearTree(NULL); } } else { if(numSeqs - profile1NumSeqs >= 2) { RootedGuideTree guideTree; generateTreeFromDistMatUPGMA(&guideTree, distMat, alignPtr, profile1NumSeqs + 1, numSeqs - profile1NumSeqs, p2TreeName, success); guideTree.calcSeqWeights(profile1NumSeqs, numSeqs, p2Weights); } } if (userParameters->getNewTree1File() || userParameters->getNewTree2File()) { *success = false; return; } MSA* msaObj = new MSA(); int count = msaObj->calcPairwiseForProfileAlign(alignPtr, distMat); delete msaObj; if (count == 0) { *success = false; return; } /** * Convert distances to similarities!!!!!! */ distMat->makeSimilarityMatrix(); *success = true; } /** * Mark 10-5-2007: Bug fix # 42 * The following function is to be used only for calculating the weights for the Qt * low scoring segments. */ void TreeInterface::getWeightsForQtLowScore(vector* seqWeights, DistMatrix* distMat, Alignment *alignPtr, int seq1, int nSeqs, string* phylipName, bool* success) { string copyOfPhylipName = string(*phylipName); int _nSeqs = nSeqs; int status = 0; generateTreeFromDistMatNJ(distMat, alignPtr, seq1, _nSeqs, phylipName, success); Tree groupTree; status = 0; if (_nSeqs >= 2) { status = groupTree.readTree(alignPtr, phylipName->c_str(), seq1 - 1, seq1 + _nSeqs-1); // mark 22-5-07 if(status == 0) { *success = false; return; } else { *success = true; } } groupTree.calcSeqWeights(seq1 - 1, seq1 + _nSeqs - 1, seqWeights); // mark 23-5-07 } }