/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include "UnRootedClusterTree.h" #include "../general/utils.h" #include "RandomGenerator.h" #include #include #include "../general/OutputFile.h" namespace clustalw { UnRootedClusterTree::UnRootedClusterTree() { } /* * The function treeFromAlignment is called to generate a tree from an alignment * without a distance matrix. It calculates the distance matrix from the alignment * quickly. */ // Note this function was called phylogenetic_tree before void UnRootedClusterTree::treeFromAlignment(TreeNames* treeNames, Alignment *alignPtr) { try { OutputFile phylipPhyTreeFile; OutputFile clustalPhyTreeFile; OutputFile distancesPhyTreeFile; OutputFile nexusPhyTreeFile; OutputFile pimFile; string path; int i, j; int overspill = 0; int totalDists; numSeqs = alignPtr->getNumSeqs(); // NOTE class variable /** * Check if numSeqs is ok */ if(!checkIfConditionsMet(numSeqs, 2)) { return; } firstSeq = 1; lastSeq = numSeqs; phyloTree = new PhyloTree; // The SeqInfo struct is passed to reduce the number of parameters passed! SeqInfo info; info.firstSeq = firstSeq; info.lastSeq = lastSeq; info.numSeqs = numSeqs; outputTree = new ClusterTreeOutput(&info, 0); // No bootstrap! phyloTree->treeDesc.resize(numSeqs + 1, vector(numSeqs + 1)); TreeGroups saveTree(numSeqs + 1, vector(numSeqs + 1)); // NOTE at the moment there is only one type of clustering algorithm, but there will // be more! clusAlgorithm = new NJTree(); utilityObject->getPath(userParameters->getSeqName(), &path); /** * Open the required output files. */ if(!openFilesForTreeFromAlignment(&clustalPhyTreeFile, &phylipPhyTreeFile, &distancesPhyTreeFile, &nexusPhyTreeFile, &pimFile, treeNames, &path)) { return; // Problem opeing one of the files, cannot continue! } int _lenFirstSeq = alignPtr->getSeqLength(firstSeq); bootPositions.resize(_lenFirstSeq + 2); for (j = 1; j <= _lenFirstSeq; ++j) { bootPositions[j] = j; } /** * Calculate quickDist and overspill */ overspill = calcQuickDistMatForAll(clustalPhyTreeFile.getPtrToFile(), phylipPhyTreeFile.getPtrToFile(), nexusPhyTreeFile.getPtrToFile(), pimFile.getPtrToFile(), distancesPhyTreeFile.getPtrToFile(), alignPtr); // check if any distances overflowed the distance corrections if (overspill > 0) { totalDists = (numSeqs *(numSeqs - 1)) / 2; overspillMessage(overspill, totalDists); } if (userParameters->getOutputTreeClustal()) { verbose = true; } // Turn on file output if (userParameters->getOutputTreeClustal() || userParameters->getOutputTreePhylip() || userParameters->getOutputTreeNexus()) { clusAlgorithm->setVerbose(true); clusAlgorithm->generateTree(phyloTree, quickDistMat.get(), &info, clustalPhyTreeFile.getPtrToFile()); clusAlgorithm->setVerbose(false); } for (i = 1; i < numSeqs + 1; i++) for (j = 1; j < numSeqs + 1; j++) { saveTree[i][j] = phyloTree->treeDesc[i][j]; } if (userParameters->getOutputTreePhylip()) { outputTree->printPhylipTree(phyloTree, phylipPhyTreeFile.getPtrToFile(), alignPtr, quickDistMat.get(), &bootTotals); } for (i = 1; i < numSeqs + 1; i++) for (j = 1; j < numSeqs + 1; j++) { phyloTree->treeDesc[i][j] = saveTree[i][j]; } if (userParameters->getOutputTreeNexus()) { outputTree->printNexusTree(phyloTree, nexusPhyTreeFile.getPtrToFile(), alignPtr, quickDistMat.get(), &bootTotals); } /** Free up resources!!!!! */ treeGaps.clear(); bootPositions.clear(); delete clusAlgorithm; delete phyloTree; delete outputTree; } catch(const exception& ex) { cerr << ex.what() << endl; utilityObject->error("Terminating program. Cannot continue\n"); exit(1); } } /* * Routine for producing unrooted NJ trees from seperately aligned * pairwise distances. This produces the GUIDE DENDROGRAMS in * PHYLIP format. */ void UnRootedClusterTree::treeFromDistMatrix(DistMatrix* distMat, Alignment *alignPtr, int seq1, int nSeqs, string& phylipName) { OutputFile phylipPhyTreeFile; try { // Test to see if the inputs are valid if(seq1 < 1 || nSeqs < 1) { cerr << "Invalid inputs into treeFromDistMatrix \n" << "seq1 = " << seq1 << " nSeqs = " << nSeqs << "\n" << "Need to end program!\n"; exit(1); return; } PhyloTree phyloTree; float dist; string path; verbose = false; firstSeq = seq1; lastSeq = firstSeq + nSeqs - 1; SeqInfo info; info.firstSeq = firstSeq; info.lastSeq = lastSeq; info.numSeqs = nSeqs; // Open up the phylipPhyTreeFile now! // NOTE that this is not exactly correct. This may cause a problem when outputing // a tree for each of the profiles. But then we can pass it in, maybe. utilityObject->getPath(userParameters->getSeqName(), &path); if(nSeqs >= 2) { string name = phylipName; if(!phylipPhyTreeFile.openFile(&name, "\nEnter name for new GUIDE TREE file ", &path, "dnd", "Guide tree")) { return; } phylipName = name; } else { return; } // Not sure about bootstrapping here! clusAlgorithm = new NJTree(); outputTree = new ClusterTreeOutput(&info, 0); ofstream* ptrToFile = phylipPhyTreeFile.getPtrToFile(); if (nSeqs == 2) { dist = (*distMat)(firstSeq, firstSeq + 1) / 2.0; if(ptrToFile->is_open()) { (*ptrToFile) << "(" << alignPtr->getName(firstSeq) << ":" << setprecision(5) << dist << "," << alignPtr->getName(firstSeq + 1) << ":" << setprecision(5) << dist <<");\n"; } } else { int dimensions = lastSeq - firstSeq + 2; phyloTree.treeDesc.resize(dimensions, vector(dimensions)); /* debugging, also used when -OUTPUTTREE is used */ #if 0 ofstream debuglog("debug.treeFromDistMatrix.txt", ios::out); clusAlgorithm->setVerbose(true); clusAlgorithm->generateTree(&phyloTree, distMat, &info, &debuglog); #else clusAlgorithm->generateTree(&phyloTree, distMat, &info); #endif outputTree->printPhylipTree(&phyloTree, ptrToFile, alignPtr, distMat, &bootTotals); } delete clusAlgorithm; delete outputTree; //phylipPhyTreeFile.close(); } catch(const exception &ex) { cerr << "ERROR: Error has occured in treeFromDistMatrix. " << "Need to terminate program.\n" << ex.what(); exit(1); } catch(...) { cerr << "ERROR: Error has occured in treeFromDistMatrix. " << "Need to terminate program.\n"; exit(1); } } /* * Note before I had this function was accepting a distance matrix. But I think it * just uses the quickly generated one. So it doesnt need it anymore. But be careful !!!! */ void UnRootedClusterTree::bootstrapTree(TreeNames* treeNames, Alignment *alignPtr) { int i, j; int ranno; string path; OutputFile clustalPhyTreeFile; ofstream* ptrToClustalFile; OutputFile phylipPhyTreeFile; OutputFile nexusPhyTreeFile; try { phyloTree = new PhyloTree; PhyloTree sampleTree; PhyloTree standardTree; PhyloTree saveTree; int totalDists, overspill = 0, totalOverspill = 0; int nfails = 0; numSeqs = alignPtr->getNumSeqs(); firstSeq = 1; lastSeq = numSeqs; clusAlgorithm = new NJTree(); SeqInfo info; info.firstSeq = firstSeq; info.lastSeq = lastSeq; info.numSeqs = numSeqs; /** * Check if numSeqs is ok */ if(!checkIfConditionsMet(numSeqs, 4)) { return; } if (!userParameters->getOutputTreeClustal() && !userParameters->getOutputTreePhylip() && !userParameters->getOutputTreeNexus()) { utilityObject->error("You must select either clustal or phylip or nexus tree output format"); return; } utilityObject->getPath(userParameters->getSeqName(), &path); if(!openFilesForBootstrap(&clustalPhyTreeFile, &phylipPhyTreeFile, &nexusPhyTreeFile, treeNames, &path)) { return; // There was a problem opening the output files. } int _lenFirstSeq = alignPtr->getSeqLength(firstSeq); bootTotals.clear(); bootTotals.resize(numSeqs + 1); bootPositions.clear(); bootPositions.resize(_lenFirstSeq + 2); for (j = 1; j <= _lenFirstSeq; ++j) // First select all positions for { bootPositions[j] = j; } // the "standard" tree overspill = calcQuickDistMatForSubSet(clustalPhyTreeFile.getPtrToFile(), phylipPhyTreeFile.getPtrToFile(), nexusPhyTreeFile.getPtrToFile(), alignPtr); // check if any distances overflowed the distance corrections if (overspill > 0) { totalDists = (numSeqs *(numSeqs - 1)) / 2; overspillMessage(overspill, totalDists); } treeGaps.clear(); if (userParameters->getOutputTreeClustal()) { verbose = true; } // Turn on screen output phyloTree->treeDesc.resize(numSeqs + 1, vector(numSeqs + 1)); // compute the standard tree if (userParameters->getOutputTreeClustal() || userParameters->getOutputTreePhylip() || userParameters->getOutputTreeNexus()) { clusAlgorithm->setVerbose(true); clusAlgorithm->generateTree(phyloTree, quickDistMat.get(), &info, clustalPhyTreeFile.getPtrToFile()); } ptrToClustalFile = clustalPhyTreeFile.getPtrToFile(); promptForBoolSeedAndNumTrials(); RandomGenerator randGenerator(userParameters->getBootRanSeed()); /** * Print bootstrap information to top of clustal bootstrap file! */ if (userParameters->getOutputTreeClustal()) { printBootstrapHeaderToClustalFile(ptrToClustalFile); } verbose = false; // Turn OFF screen output clusAlgorithm->setVerbose(false); sampleTree.treeDesc.resize(numSeqs + 1, vector(numSeqs + 1)); if (userParameters->getMenuFlag()) { cout << "\n\nEach dot represents 10 trials\n\n"; } totalOverspill = 0; nfails = 0; int lenSeq1 = alignPtr->getSeqLength(1); for (i = 1; i <= userParameters->getBootNumTrials(); ++i) { for (j = 1; j <= alignPtr->getSeqLength(firstSeq); ++j) { // select alignment positions for ranno = randGenerator.addRand((unsigned long)lenSeq1) + 1; bootPositions[j] = ranno; // bootstrap sample } overspill = calcQuickDistMatForSubSet(clustalPhyTreeFile.getPtrToFile(), phylipPhyTreeFile.getPtrToFile(), nexusPhyTreeFile.getPtrToFile(), alignPtr, true); if (overspill > 0) { totalOverspill = totalOverspill + overspill; nfails++; } treeGaps.clear(); if (userParameters->getOutputTreeClustal() || userParameters->getOutputTreePhylip() || userParameters->getOutputTreeNexus()) { // NOTE this is bad to pass in clustalPhyTreeFile clusAlgorithm->generateTree(&sampleTree, quickDistMat.get(), &info, clustalPhyTreeFile.getPtrToFile()); } sampleTree.leftBranch.clear(); sampleTree.rightBranch.clear(); compareTree(phyloTree, &sampleTree, &bootTotals, lastSeq - firstSeq + 1); if (userParameters->getMenuFlag()) { if (i % 10 == 0) { cout << "."; } if (i % 100 == 0) { cout << "\n"; } } } // check if any distances overflowed the distance corrections if (nfails > 0) { totalDists = (numSeqs *(numSeqs - 1)) / 2; printErrorMessageForBootstrap(totalOverspill, totalDists, nfails); } bootPositions.clear(); outputTree = new ClusterTreeOutput(&info, userParameters->getBootstrapFormat()); /** * Print ClustalTree with bootTotals */ if (userParameters->getOutputTreeClustal()) { outputTree->printTree(phyloTree, clustalPhyTreeFile.getPtrToFile(), &bootTotals); } /** * Print phylip tree with boottotals. */ if (userParameters->getOutputTreePhylip()) { // Save the old tree! saveTree.treeDesc.resize(numSeqs + 1, vector(numSeqs + 1)); saveTree.treeDesc.assign(phyloTree->treeDesc.begin(), phyloTree->treeDesc.end()); outputTree->printPhylipTree(phyloTree, phylipPhyTreeFile.getPtrToFile(), alignPtr, quickDistMat.get(), &bootTotals); // reassign the old values! phyloTree->treeDesc.assign(saveTree.treeDesc.begin(), saveTree.treeDesc.end()); } /** * print nexus tree with boottotals */ if (userParameters->getOutputTreeNexus()) { outputTree->printNexusTree(phyloTree, nexusPhyTreeFile.getPtrToFile(), alignPtr, quickDistMat.get(), &bootTotals); } delete phyloTree; delete clusAlgorithm; delete outputTree; } catch(const exception &ex) { } } }