4 * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson.
9 #include "UnRootedClusterTree.h"
10 #include "../general/utils.h"
11 #include "RandomGenerator.h"
14 #include "../general/OutputFile.h"
18 UnRootedClusterTree::UnRootedClusterTree()
23 * The function treeFromAlignment is called to generate a tree from an alignment
24 * without a distance matrix. It calculates the distance matrix from the alignment
28 // Note this function was called phylogenetic_tree before
29 void UnRootedClusterTree::treeFromAlignment(TreeNames* treeNames, Alignment *alignPtr)
33 OutputFile phylipPhyTreeFile;
34 OutputFile clustalPhyTreeFile;
35 OutputFile distancesPhyTreeFile;
36 OutputFile nexusPhyTreeFile;
43 numSeqs = alignPtr->getNumSeqs(); // NOTE class variable
46 * Check if numSeqs is ok
48 if(!checkIfConditionsMet(numSeqs, 2))
55 phyloTree = new PhyloTree;
57 // The SeqInfo struct is passed to reduce the number of parameters passed!
59 info.firstSeq = firstSeq;
60 info.lastSeq = lastSeq;
61 info.numSeqs = numSeqs;
63 outputTree = new ClusterTreeOutput(&info, 0); // No bootstrap!
65 phyloTree->treeDesc.resize(numSeqs + 1, vector<int>(numSeqs + 1));
67 TreeGroups saveTree(numSeqs + 1, vector<int>(numSeqs + 1));
69 // NOTE at the moment there is only one type of clustering algorithm, but there will
71 clusAlgorithm = new NJTree();
73 utilityObject->getPath(userParameters->getSeqName(), &path);
76 * Open the required output files.
78 if(!openFilesForTreeFromAlignment(&clustalPhyTreeFile, &phylipPhyTreeFile,
79 &distancesPhyTreeFile, &nexusPhyTreeFile, &pimFile, treeNames, &path))
81 return; // Problem opeing one of the files, cannot continue!
84 int _lenFirstSeq = alignPtr->getSeqLength(firstSeq);
86 bootPositions.resize(_lenFirstSeq + 2);
88 for (j = 1; j <= _lenFirstSeq; ++j)
94 * Calculate quickDist and overspill
96 overspill = calcQuickDistMatForAll(clustalPhyTreeFile.getPtrToFile(),
97 phylipPhyTreeFile.getPtrToFile(), nexusPhyTreeFile.getPtrToFile(),
98 pimFile.getPtrToFile(), distancesPhyTreeFile.getPtrToFile(), alignPtr);
100 // check if any distances overflowed the distance corrections
103 totalDists = (numSeqs *(numSeqs - 1)) / 2;
104 overspillMessage(overspill, totalDists);
107 if (userParameters->getOutputTreeClustal())
111 // Turn on file output
114 if (userParameters->getOutputTreeClustal() || userParameters->getOutputTreePhylip()
115 || userParameters->getOutputTreeNexus())
118 clusAlgorithm->setVerbose(true);
119 clusAlgorithm->generateTree(phyloTree, quickDistMat.get(), &info,
120 clustalPhyTreeFile.getPtrToFile());
121 clusAlgorithm->setVerbose(false);
124 for (i = 1; i < numSeqs + 1; i++)
125 for (j = 1; j < numSeqs + 1; j++)
127 saveTree[i][j] = phyloTree->treeDesc[i][j];
130 if (userParameters->getOutputTreePhylip())
132 outputTree->printPhylipTree(phyloTree, phylipPhyTreeFile.getPtrToFile(), alignPtr,
133 quickDistMat.get(), &bootTotals);
136 for (i = 1; i < numSeqs + 1; i++)
137 for (j = 1; j < numSeqs + 1; j++)
139 phyloTree->treeDesc[i][j] = saveTree[i][j];
142 if (userParameters->getOutputTreeNexus())
144 outputTree->printNexusTree(phyloTree, nexusPhyTreeFile.getPtrToFile(), alignPtr,
145 quickDistMat.get(), &bootTotals);
148 /** Free up resources!!!!! */
151 bootPositions.clear();
153 delete clusAlgorithm;
157 catch(const exception& ex)
159 cerr << ex.what() << endl;
160 utilityObject->error("Terminating program. Cannot continue\n");
166 * Routine for producing unrooted NJ trees from seperately aligned
167 * pairwise distances. This produces the GUIDE DENDROGRAMS in
170 void UnRootedClusterTree::treeFromDistMatrix(DistMatrix* distMat,
171 Alignment *alignPtr, int seq1, int nSeqs,
174 OutputFile phylipPhyTreeFile;
178 // Test to see if the inputs are valid
179 if(seq1 < 1 || nSeqs < 1)
181 cerr << "Invalid inputs into treeFromDistMatrix \n"
182 << "seq1 = " << seq1 << " nSeqs = " << nSeqs << "\n"
183 << "Need to end program!\n";
192 lastSeq = firstSeq + nSeqs - 1;
195 info.firstSeq = firstSeq;
196 info.lastSeq = lastSeq;
197 info.numSeqs = nSeqs;
198 // Open up the phylipPhyTreeFile now!
200 // NOTE that this is not exactly correct. This may cause a problem when outputing
201 // a tree for each of the profiles. But then we can pass it in, maybe.
202 utilityObject->getPath(userParameters->getSeqName(), &path);
206 string name = phylipName;
207 if(!phylipPhyTreeFile.openFile(&name,
208 "\nEnter name for new GUIDE TREE file ", &path, "dnd",
220 // Not sure about bootstrapping here!
221 clusAlgorithm = new NJTree();
222 outputTree = new ClusterTreeOutput(&info, 0);
224 ofstream* ptrToFile = phylipPhyTreeFile.getPtrToFile();
228 dist = (*distMat)(firstSeq, firstSeq + 1) / 2.0;
229 if(ptrToFile->is_open())
231 (*ptrToFile) << "(" << alignPtr->getName(firstSeq) << ":"
233 << dist << "," << alignPtr->getName(firstSeq + 1) << ":"
234 << setprecision(5) << dist <<");\n";
239 int dimensions = lastSeq - firstSeq + 2;
240 phyloTree.treeDesc.resize(dimensions, vector<int>(dimensions));
241 /* debugging, also used when -OUTPUTTREE is used */
243 ofstream debuglog("debug.treeFromDistMatrix.txt", ios::out);
244 clusAlgorithm->setVerbose(true);
245 clusAlgorithm->generateTree(&phyloTree, distMat, &info, &debuglog);
247 clusAlgorithm->generateTree(&phyloTree, distMat, &info);
250 outputTree->printPhylipTree(&phyloTree, ptrToFile, alignPtr,
251 distMat, &bootTotals);
253 delete clusAlgorithm;
256 //phylipPhyTreeFile.close();
258 catch(const exception &ex)
260 cerr << "ERROR: Error has occured in treeFromDistMatrix. "
261 << "Need to terminate program.\n"
267 cerr << "ERROR: Error has occured in treeFromDistMatrix. "
268 << "Need to terminate program.\n";
274 * Note before I had this function was accepting a distance matrix. But I think it
275 * just uses the quickly generated one. So it doesnt need it anymore. But be careful !!!!
277 void UnRootedClusterTree::bootstrapTree(TreeNames* treeNames, Alignment *alignPtr)
283 OutputFile clustalPhyTreeFile;
284 ofstream* ptrToClustalFile;
285 OutputFile phylipPhyTreeFile;
286 OutputFile nexusPhyTreeFile;
290 phyloTree = new PhyloTree;
291 PhyloTree sampleTree;
292 PhyloTree standardTree;
294 int totalDists, overspill = 0, totalOverspill = 0;
296 numSeqs = alignPtr->getNumSeqs();
299 clusAlgorithm = new NJTree();
302 info.firstSeq = firstSeq;
303 info.lastSeq = lastSeq;
304 info.numSeqs = numSeqs;
307 * Check if numSeqs is ok
309 if(!checkIfConditionsMet(numSeqs, 4))
314 if (!userParameters->getOutputTreeClustal() && !userParameters->getOutputTreePhylip()
315 && !userParameters->getOutputTreeNexus())
317 utilityObject->error("You must select either clustal or phylip or nexus tree output format");
321 utilityObject->getPath(userParameters->getSeqName(), &path);
323 if(!openFilesForBootstrap(&clustalPhyTreeFile, &phylipPhyTreeFile, &nexusPhyTreeFile,
326 return; // There was a problem opening the output files.
329 int _lenFirstSeq = alignPtr->getSeqLength(firstSeq);
331 bootTotals.resize(numSeqs + 1);
332 bootPositions.clear();
333 bootPositions.resize(_lenFirstSeq + 2);
335 for (j = 1; j <= _lenFirstSeq; ++j)
336 // First select all positions for
338 bootPositions[j] = j;
341 // the "standard" tree
342 overspill = calcQuickDistMatForSubSet(clustalPhyTreeFile.getPtrToFile(),
343 phylipPhyTreeFile.getPtrToFile(), nexusPhyTreeFile.getPtrToFile(), alignPtr);
345 // check if any distances overflowed the distance corrections
348 totalDists = (numSeqs *(numSeqs - 1)) / 2;
349 overspillMessage(overspill, totalDists);
354 if (userParameters->getOutputTreeClustal())
358 // Turn on screen output
359 phyloTree->treeDesc.resize(numSeqs + 1, vector<int>(numSeqs + 1));
361 // compute the standard tree
363 if (userParameters->getOutputTreeClustal() || userParameters->getOutputTreePhylip() ||
364 userParameters->getOutputTreeNexus())
366 clusAlgorithm->setVerbose(true);
367 clusAlgorithm->generateTree(phyloTree, quickDistMat.get(), &info,
368 clustalPhyTreeFile.getPtrToFile());
371 ptrToClustalFile = clustalPhyTreeFile.getPtrToFile();
373 promptForBoolSeedAndNumTrials();
375 RandomGenerator randGenerator(userParameters->getBootRanSeed());
378 * Print bootstrap information to top of clustal bootstrap file!
380 if (userParameters->getOutputTreeClustal())
382 printBootstrapHeaderToClustalFile(ptrToClustalFile);
385 verbose = false; // Turn OFF screen output
386 clusAlgorithm->setVerbose(false);
388 sampleTree.treeDesc.resize(numSeqs + 1, vector<int>(numSeqs + 1));
390 if (userParameters->getMenuFlag())
392 cout << "\n\nEach dot represents 10 trials\n\n";
397 int lenSeq1 = alignPtr->getSeqLength(1);
398 for (i = 1; i <= userParameters->getBootNumTrials(); ++i)
400 for (j = 1; j <= alignPtr->getSeqLength(firstSeq); ++j)
402 // select alignment positions for
403 ranno = randGenerator.addRand((unsigned long)lenSeq1) + 1;
404 bootPositions[j] = ranno; // bootstrap sample
407 overspill = calcQuickDistMatForSubSet(clustalPhyTreeFile.getPtrToFile(),
408 phylipPhyTreeFile.getPtrToFile(), nexusPhyTreeFile.getPtrToFile(), alignPtr,
413 totalOverspill = totalOverspill + overspill;
419 if (userParameters->getOutputTreeClustal() ||
420 userParameters->getOutputTreePhylip() || userParameters->getOutputTreeNexus())
422 // NOTE this is bad to pass in clustalPhyTreeFile
423 clusAlgorithm->generateTree(&sampleTree, quickDistMat.get(), &info,
424 clustalPhyTreeFile.getPtrToFile());
427 sampleTree.leftBranch.clear();
428 sampleTree.rightBranch.clear();
430 compareTree(phyloTree, &sampleTree, &bootTotals, lastSeq - firstSeq + 1);
432 if (userParameters->getMenuFlag())
445 // check if any distances overflowed the distance corrections
448 totalDists = (numSeqs *(numSeqs - 1)) / 2;
449 printErrorMessageForBootstrap(totalOverspill, totalDists, nfails);
452 bootPositions.clear();
454 outputTree = new ClusterTreeOutput(&info, userParameters->getBootstrapFormat());
457 * Print ClustalTree with bootTotals
459 if (userParameters->getOutputTreeClustal())
461 outputTree->printTree(phyloTree, clustalPhyTreeFile.getPtrToFile(), &bootTotals);
465 * Print phylip tree with boottotals.
467 if (userParameters->getOutputTreePhylip())
469 // Save the old tree!
470 saveTree.treeDesc.resize(numSeqs + 1, vector<int>(numSeqs + 1));
471 saveTree.treeDesc.assign(phyloTree->treeDesc.begin(), phyloTree->treeDesc.end());
473 outputTree->printPhylipTree(phyloTree, phylipPhyTreeFile.getPtrToFile(),
474 alignPtr, quickDistMat.get(), &bootTotals);
475 // reassign the old values!
476 phyloTree->treeDesc.assign(saveTree.treeDesc.begin(), saveTree.treeDesc.end());
480 * print nexus tree with boottotals
482 if (userParameters->getOutputTreeNexus())
484 outputTree->printNexusTree(phyloTree, nexusPhyTreeFile.getPtrToFile(), alignPtr,
485 quickDistMat.get(), &bootTotals);
489 delete clusAlgorithm;
492 catch(const exception &ex)