4 * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson.
8 * Mark 10-5-2007: Bug fix # 42. Added getWeightsForQtLowScore function.
9 * Mark 22-5-2007: Made a change to getWeightsForQtLowScore
10 * Mark 23-5-2007: Made a change to getWeightsForQtLowScore
15 #include "TreeInterface.h"
17 #include "UnRootedClusterTree.h"
19 #include "../multipleAlign/MSA.h"
20 #include "../general/userparams.h"
21 #include "../general/debuglogObject.h"
22 #include "UPGMA/RootedGuideTree.h"
23 #include "UPGMA/RootedClusterTree.h"
28 * This will be called by align!
31 auto_ptr<AlignmentSteps>
32 TreeInterface::getWeightsAndStepsFromDistMat(vector<int>* seqWeights, DistMatrix* distMat,
33 Alignment *alignPtr, int seq1, int nSeqs,
34 string* phylipName, bool* success)
37 if(logObject && DEBUGLOG)
39 logObject->logMsg("In getWeightsAndStepsFromDistMat\n");
42 if(userParameters->getClusterAlgorithm() == UPGMA)
44 return getWeightsAndStepsFromDistMatUPGMA(seqWeights, distMat, alignPtr,
45 seq1, nSeqs, phylipName, success);
49 return getWeightsAndStepsFromDistMatNJ(seqWeights, distMat, alignPtr,
50 seq1, nSeqs, phylipName, success);
56 * This will be called by sequencesAlignToProfile. This function will put the distMat into
57 * a similarity matrix.
60 TreeInterface::getWeightsFromDistMat(vector<int>* seqWeights, DistMatrix* distMat,
61 Alignment *alignPtr, int seq1, int nSeqs,
62 string* phylipName, bool* success)
64 if(userParameters->getClusterAlgorithm() == UPGMA)
66 getWeightsFromDistMatUPGMA(seqWeights, distMat, alignPtr, seq1, nSeqs, phylipName,
71 getWeightsFromDistMatNJ(seqWeights, distMat, alignPtr, seq1, nSeqs, phylipName,
77 * This function will be called by profileAlign
80 TreeInterface::getWeightsForProfileAlign(Alignment* alignPtr, DistMatrix* distMat,
81 string* p1TreeName, vector<int>* p1Weights, string* p2TreeName,
82 vector<int>* p2Weights, int numSeqs, int profile1NumSeqs, bool useTree1,
83 bool useTree2, bool* success)
85 if(userParameters->getClusterAlgorithm() == UPGMA)
87 getWeightsForProfileAlignUPGMA(alignPtr, distMat, p1TreeName, p1Weights, p2TreeName,
88 p2Weights, numSeqs, profile1NumSeqs, useTree1, useTree2,
93 getWeightsForProfileAlignNJ(alignPtr, distMat, p1TreeName, p1Weights, p2TreeName,
94 p2Weights, numSeqs, profile1NumSeqs, useTree1, useTree2,
100 * This function will be called by doAlignUseOldGuideTree
103 auto_ptr<AlignmentSteps>
104 TreeInterface::getWeightsAndStepsFromTree(Alignment* alignPtr,
105 DistMatrix* distMat, string* treeName,
106 vector<int>* seqWeights, int fSeq,
107 int numSeqs, bool* success)
110 * This will only use the NJ. It will not use UPGMA
112 return getWeightsAndStepsFromTreeNJ(alignPtr, distMat, treeName, seqWeights,
113 fSeq, numSeqs, success);
117 * Called by sequencesAlignToProfile
120 TreeInterface::getWeightsFromGuideTree(Alignment* alignPtr, DistMatrix* distMat,
121 string* treeName, vector<int>* seqWeights, int fSeq,
122 int nSeqs, bool* success)
125 * This will only use the NJ. It will not use UPGMA
127 return getWeightsFromGuideTreeNJ(alignPtr, distMat, treeName, seqWeights, fSeq, nSeqs,
132 * This will be called by doGuideTreeOnly. This does not put the distMat into similarity.
136 TreeInterface::generateTreeFromDistMat(DistMatrix* distMat, Alignment *alignPtr,
138 string* phylipName, bool* success)
141 * This function does not put distMat into similarities
143 if(userParameters->getClusterAlgorithm() == UPGMA)
145 RootedGuideTree guideTree;
146 generateTreeFromDistMatUPGMA(&guideTree, distMat, alignPtr, seq1, nSeqs,
147 phylipName, success);
151 generateTreeFromDistMatNJ(distMat, alignPtr, seq1, nSeqs, phylipName, success);
156 TreeInterface::treeFromAlignment(TreeNames* treeNames, Alignment *alignPtr)
158 if(userParameters->getClusterAlgorithm() == UPGMA)
160 RootedClusterTree clusterTree;
161 clusterTree.treeFromAlignment(treeNames, alignPtr);
165 UnRootedClusterTree clusterTree;
166 clusterTree.treeFromAlignment(treeNames, alignPtr);
171 TreeInterface::bootstrapTree(TreeNames* treeNames, Alignment *alignPtr)
174 * NOTE We only do bootstrapping on the NJ tree.
176 UnRootedClusterTree clusterTree;
177 clusterTree.bootstrapTree(treeNames, alignPtr);
182 * Private functions!!!!
187 /** *******************
189 * Neighbour joining functions
192 * Note: After this function has been called the distance matrix will all be in similarities.
195 auto_ptr<AlignmentSteps>
196 TreeInterface::getWeightsAndStepsFromDistMatNJ(vector<int>* seqWeights, DistMatrix* distMat,
197 Alignment *alignPtr, int seq1, int nSeqs,
198 string* phylipName, bool* success)
200 auto_ptr<AlignmentSteps> progSteps;
201 generateTreeFromDistMatNJ(distMat, alignPtr, seq1, nSeqs, phylipName, success);
203 progSteps = getWeightsAndStepsUseOldGuideTreeNJ(distMat, alignPtr, phylipName,
204 seqWeights, seq1, nSeqs, success);
208 auto_ptr<AlignmentSteps>
209 TreeInterface::getWeightsAndStepsUseOldGuideTreeNJ(DistMatrix* distMat, Alignment *alignPtr,
210 string* treeName, vector<int>* seqWeights,
211 int fSeq, int nSeqs, bool* success)
214 auto_ptr<AlignmentSteps> progSteps;
218 utilityObject->info("Only 1 sequence, cannot do multiple alignment\n");
225 status = readTreeAndCalcWeightsNJ(&groupTree, alignPtr, distMat, treeName,
226 seqWeights, fSeq, nSeqs);
234 progSteps = groupTree.createSets(0, nSeqs);
235 int _numSteps = progSteps->getNumSteps();
237 utilityObject->info("There are %d groups", _numSteps);
238 // clear the memory used for the phylogenetic tree
242 groupTree.clearTree(NULL);
252 * The function readTreeAndCalcWeightsNJ is used to read in the tree given the treeName.
253 * It then calls the appropriate functions to calc the seqWeights and make sure the matrix
254 * is in similarity mode.
257 TreeInterface::readTreeAndCalcWeightsNJ(Tree* groupTree, Alignment* alignPtr,
258 DistMatrix* distMat, string* treeName,
259 vector<int>* seqWeights, int fSeq, int nSeqs)
264 status = groupTree->readTree(alignPtr, treeName->c_str(), fSeq - 1, nSeqs);
271 groupTree->calcSeqWeights(fSeq - 1, nSeqs, seqWeights);
273 status = groupTree->calcSimilarities(alignPtr, distMat);
279 TreeInterface::getWeightsFromGuideTreeNJ(Alignment* alignPtr, DistMatrix* distMat,
280 string* treeName, vector<int>* seqWeights, int fSeq, int nSeqs,
284 int status = readTreeAndCalcWeightsNJ(&groupTree, alignPtr, distMat, treeName,
285 seqWeights, fSeq, nSeqs);
298 TreeInterface::getWeightsFromDistMatNJ(vector<int>* seqWeights, DistMatrix* distMat,
299 Alignment *alignPtr, int seq1, int _nSeqs,
300 string* phylipName, bool* success)
302 string copyOfPhylipName = string(*phylipName);
306 generateTreeFromDistMatNJ(distMat, alignPtr, seq1, nSeqs, phylipName, success);
309 status = readTreeAndCalcWeightsNJ(&groupTree, alignPtr, distMat, phylipName,
310 seqWeights, seq1, nSeqs);
322 TreeInterface::getWeightsForProfileAlignNJ(Alignment* alignPtr, DistMatrix* distMat,
323 string* p1TreeName, vector<int>* p1Weights, string* p2TreeName,
324 vector<int>* p2Weights, int numSeqs, int profile1NumSeqs, bool useTree1,
325 bool useTree2, bool* success)
329 if (profile1NumSeqs >= 2)
331 generateTreeFromDistMatNJ(distMat, alignPtr, 1, profile1NumSeqs, p1TreeName,
338 if(numSeqs - profile1NumSeqs >= 2)
340 generateTreeFromDistMatNJ(distMat, alignPtr, profile1NumSeqs + 1,
341 numSeqs - profile1NumSeqs, p2TreeName, success);
345 if (userParameters->getNewTree1File() || userParameters->getNewTree2File())
352 MSA* msaObj = new MSA();
354 int count = msaObj->calcPairwiseForProfileAlign(alignPtr, distMat);
362 Tree groupTree1, groupTree2;
365 if (profile1NumSeqs >= 2)
367 status = groupTree1.readTree(alignPtr, p1TreeName->c_str(), 0, profile1NumSeqs);
375 groupTree1.calcSeqWeights(0, profile1NumSeqs, p1Weights);
377 if (profile1NumSeqs >= 2)
379 groupTree1.clearTree(NULL);
382 if (numSeqs - profile1NumSeqs >= 2)
384 status = groupTree2.readTree(alignPtr, p2TreeName->c_str(), profile1NumSeqs, numSeqs);
392 groupTree2.calcSeqWeights(profile1NumSeqs, numSeqs, p2Weights);
395 /* clear the memory for the phylogenetic tree */
397 if (numSeqs - profile1NumSeqs >= 2)
399 groupTree2.clearTree(NULL);
403 * Convert distances to similarities!!!!!!
405 for (int i = 1; i < numSeqs; i++)
407 for (int j = i + 1; j <= numSeqs; j++)
409 (*distMat)(i, j) = 100.0 - (*distMat)(i, j) * 100.0;
410 (*distMat)(j, i) = (*distMat)(i, j);
419 TreeInterface::generateTreeFromDistMatNJ(DistMatrix* distMat, Alignment *alignPtr,
421 string* phylipName, bool* success)
423 string copyOfPhylipName = string(*phylipName);
427 UnRootedClusterTree* clusterTree = new UnRootedClusterTree;
428 clusterTree->treeFromDistMatrix(distMat, alignPtr, seq1, nSeqs, copyOfPhylipName);
430 *phylipName = copyOfPhylipName;
431 // AW: message outputted by OutputFile function
432 // utilityObject->info("Guide tree file created: [%s]",
433 // phylipName->c_str());
439 auto_ptr<AlignmentSteps>
440 TreeInterface::getWeightsAndStepsFromTreeNJ(Alignment* alignPtr,
441 DistMatrix* distMat, string* treeName,
442 vector<int>* seqWeights, int fSeq, int numSeqs,
445 auto_ptr<AlignmentSteps> progSteps;
449 utilityObject->info("Only 1 sequence, cannot do multiple alignment\n");
454 status = readTreeAndCalcWeightsNJ(&groupTree, alignPtr, distMat, treeName,
455 seqWeights, fSeq, numSeqs);
463 progSteps = groupTree.createSets(0, numSeqs);
464 int _numSteps = progSteps->getNumSteps();
465 utilityObject->info("There are %d groups", _numSteps);
466 // clear the memory used for the phylogenetic tree
470 groupTree.clearTree(NULL);
485 auto_ptr<AlignmentSteps>
486 TreeInterface::getWeightsAndStepsFromDistMatUPGMA(vector<int>* seqWeights,
487 DistMatrix* distMat, Alignment *alignPtr,
488 int seq1, int nSeqs, string* phylipName, bool* success)
490 auto_ptr<AlignmentSteps> progSteps;
491 RootedGuideTree guideTree;
493 progSteps = generateTreeFromDistMatUPGMA(&guideTree, distMat, alignPtr, seq1, nSeqs,
494 phylipName, success);
496 guideTree.calcSeqWeights(0, nSeqs, seqWeights);
498 distMat->makeSimilarityMatrix();
503 auto_ptr<AlignmentSteps> TreeInterface::generateTreeFromDistMatUPGMA(RootedGuideTree* guideTree, DistMatrix* distMat, Alignment *alignPtr, int seq1, int nSeqs,
504 string* phylipName, bool* success)
506 auto_ptr<AlignmentSteps> progSteps;
507 string copyOfPhylipName = string(*phylipName);
511 RootedClusterTree clusterTree;
512 progSteps = clusterTree.treeFromDistMatrix(guideTree, distMat, alignPtr, seq1,
513 nSeqs, copyOfPhylipName);
515 *phylipName = copyOfPhylipName;
516 // AW: message outputted by OutputFile function
517 // utilityObject->info("Guide tree file created: [%s]",
518 // phylipName->c_str());
524 void TreeInterface::getWeightsFromDistMatUPGMA(vector<int>* seqWeights, DistMatrix* distMat,
525 Alignment *alignPtr, int seq1, int nSeqs,
526 string* phylipName, bool* success)
528 getWeightsAndStepsFromDistMatUPGMA(seqWeights, distMat, alignPtr,
529 seq1, nSeqs, phylipName, success);
533 * The function getWeightsForProfileAlignUPGMA is used to generate the sequence weights
534 * that will be used in a profile alignment. It also recalculates the distance matrix, and
535 * then returns it as a similarity matrix. This function uses the NJ code to read in a tree
536 * from a file if we are using a previous tree. This is because that part of the code is able
537 * to do the finding of the root etc.
539 void TreeInterface::getWeightsForProfileAlignUPGMA(Alignment* alignPtr, DistMatrix* distMat,
540 string* p1TreeName, vector<int>* p1Weights, string* p2TreeName,
541 vector<int>* p2Weights, int numSeqs, int profile1NumSeqs, bool useTree1,
542 bool useTree2, bool* success)
548 // Use the code to read in the tree and get the seqWeights
550 if (profile1NumSeqs >= 2)
552 status = groupTree1.readTree(alignPtr, p1TreeName->c_str(), 0, profile1NumSeqs);
559 groupTree1.calcSeqWeights(0, profile1NumSeqs, p1Weights);
561 if (profile1NumSeqs >= 2)
563 groupTree1.clearTree(NULL);
568 if (profile1NumSeqs >= 2)
570 RootedGuideTree guideTree;
571 generateTreeFromDistMatUPGMA(&guideTree, distMat, alignPtr, 1, profile1NumSeqs,
572 p1TreeName, success);
573 guideTree.calcSeqWeights(0, profile1NumSeqs, p1Weights);
580 if (numSeqs - profile1NumSeqs >= 2)
582 status = groupTree2.readTree(alignPtr, p2TreeName->c_str(),
583 profile1NumSeqs, numSeqs);
590 groupTree2.calcSeqWeights(profile1NumSeqs, numSeqs, p2Weights);
592 if (numSeqs - profile1NumSeqs >= 2)
594 groupTree2.clearTree(NULL);
599 if(numSeqs - profile1NumSeqs >= 2)
601 RootedGuideTree guideTree;
602 generateTreeFromDistMatUPGMA(&guideTree, distMat, alignPtr, profile1NumSeqs + 1,
603 numSeqs - profile1NumSeqs, p2TreeName, success);
604 guideTree.calcSeqWeights(profile1NumSeqs, numSeqs, p2Weights);
608 if (userParameters->getNewTree1File() || userParameters->getNewTree2File())
614 MSA* msaObj = new MSA();
616 int count = msaObj->calcPairwiseForProfileAlign(alignPtr, distMat);
627 * Convert distances to similarities!!!!!!
629 distMat->makeSimilarityMatrix();
636 * Mark 10-5-2007: Bug fix # 42
637 * The following function is to be used only for calculating the weights for the Qt
638 * low scoring segments.
640 void TreeInterface::getWeightsForQtLowScore(vector<int>* seqWeights, DistMatrix* distMat,
641 Alignment *alignPtr, int seq1, int nSeqs,
642 string* phylipName, bool* success)
644 string copyOfPhylipName = string(*phylipName);
648 generateTreeFromDistMatNJ(distMat, alignPtr, seq1, _nSeqs, phylipName, success);
655 status = groupTree.readTree(alignPtr, phylipName->c_str(), seq1 - 1,
656 seq1 + _nSeqs-1); // mark 22-5-07
668 groupTree.calcSeqWeights(seq1 - 1, seq1 + _nSeqs - 1, seqWeights); // mark 23-5-07