Next version of JABA
[jabaws.git] / binaries / src / clustalw / src / tree / UnRootedClusterTree.cpp
1 /**
2  * Author: Mark Larkin
3  * 
4  * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson.  
5  */
6 #ifdef HAVE_CONFIG_H
7     #include "config.h"
8 #endif
9 #include "UnRootedClusterTree.h"
10 #include "../general/utils.h"
11 #include "RandomGenerator.h"
12 #include <math.h>
13 #include <sstream>
14 #include "../general/OutputFile.h"
15
16 namespace clustalw
17 {
18 UnRootedClusterTree::UnRootedClusterTree()
19 {
20
21 }
22 /*
23  * The function treeFromAlignment is called to generate a tree from an alignment
24  * without a distance matrix. It calculates the distance matrix from the alignment
25  * quickly.
26  */
27  
28 // Note this function was called phylogenetic_tree before
29 void UnRootedClusterTree::treeFromAlignment(TreeNames* treeNames, Alignment *alignPtr)
30 {
31     try
32     {
33         OutputFile phylipPhyTreeFile;   
34         OutputFile clustalPhyTreeFile;
35         OutputFile distancesPhyTreeFile;
36         OutputFile nexusPhyTreeFile;
37         OutputFile pimFile;
38     
39         string path;
40         int i, j;
41         int overspill = 0;
42         int totalDists;
43         numSeqs = alignPtr->getNumSeqs(); // NOTE class variable
44         
45         /**
46          * Check if numSeqs is ok
47          */
48         if(!checkIfConditionsMet(numSeqs, 2))
49         {
50             return;
51         }
52               
53         firstSeq = 1;
54         lastSeq = numSeqs;
55         phyloTree = new PhyloTree;
56         
57         // The SeqInfo struct is passed to reduce the number of parameters passed!
58         SeqInfo info;
59         info.firstSeq = firstSeq;
60         info.lastSeq = lastSeq;
61         info.numSeqs = numSeqs;
62     
63         outputTree = new ClusterTreeOutput(&info, 0); // No bootstrap!
64     
65         phyloTree->treeDesc.resize(numSeqs + 1, vector<int>(numSeqs + 1));
66     
67         TreeGroups saveTree(numSeqs + 1, vector<int>(numSeqs + 1));
68     
69         // NOTE at the moment there is only one type of clustering algorithm, but there will
70         // be more!
71         clusAlgorithm = new NJTree();
72
73         utilityObject->getPath(userParameters->getSeqName(), &path);
74
75         /**
76          * Open the required output files.
77          */
78         if(!openFilesForTreeFromAlignment(&clustalPhyTreeFile, &phylipPhyTreeFile, 
79                         &distancesPhyTreeFile, &nexusPhyTreeFile, &pimFile, treeNames, &path))
80         {
81             return; // Problem opeing one of the files, cannot continue!
82         } 
83     
84         int _lenFirstSeq = alignPtr->getSeqLength(firstSeq);
85     
86         bootPositions.resize(_lenFirstSeq + 2);
87
88         for (j = 1; j <= _lenFirstSeq; ++j)
89         {
90             bootPositions[j] = j;
91         }
92
93         /**
94          * Calculate quickDist and overspill
95          */
96         overspill = calcQuickDistMatForAll(clustalPhyTreeFile.getPtrToFile(),
97                        phylipPhyTreeFile.getPtrToFile(), nexusPhyTreeFile.getPtrToFile(),
98                        pimFile.getPtrToFile(), distancesPhyTreeFile.getPtrToFile(), alignPtr);
99
100         // check if any distances overflowed the distance corrections 
101         if (overspill > 0)
102         {
103             totalDists = (numSeqs *(numSeqs - 1)) / 2;
104             overspillMessage(overspill, totalDists);
105         }
106
107         if (userParameters->getOutputTreeClustal())
108         {
109             verbose = true;
110         }
111         // Turn on file output 
112     
113
114         if (userParameters->getOutputTreeClustal() || userParameters->getOutputTreePhylip() 
115             || userParameters->getOutputTreeNexus())
116         {
117             
118             clusAlgorithm->setVerbose(true);
119             clusAlgorithm->generateTree(phyloTree, quickDistMat.get(), &info,
120                                         clustalPhyTreeFile.getPtrToFile());
121             clusAlgorithm->setVerbose(false);                            
122         }
123
124         for (i = 1; i < numSeqs + 1; i++)
125             for (j = 1; j < numSeqs + 1; j++)
126             {
127                 saveTree[i][j] = phyloTree->treeDesc[i][j];
128             }
129
130         if (userParameters->getOutputTreePhylip())
131         {
132             outputTree->printPhylipTree(phyloTree, phylipPhyTreeFile.getPtrToFile(), alignPtr,
133                                         quickDistMat.get(), &bootTotals);
134         }
135
136         for (i = 1; i < numSeqs + 1; i++)
137             for (j = 1; j < numSeqs + 1; j++)
138             {
139                 phyloTree->treeDesc[i][j] = saveTree[i][j];
140             }
141
142         if (userParameters->getOutputTreeNexus())
143         {
144             outputTree->printNexusTree(phyloTree, nexusPhyTreeFile.getPtrToFile(), alignPtr, 
145                                        quickDistMat.get(), &bootTotals);
146         }
147     
148         /** Free up resources!!!!! */
149     
150         treeGaps.clear();
151         bootPositions.clear();
152           
153         delete clusAlgorithm;
154         delete phyloTree;
155         delete outputTree;
156     }
157     catch(const exception& ex)
158     {
159         cerr << ex.what() << endl;
160         utilityObject->error("Terminating program. Cannot continue\n");
161         exit(1);    
162     }
163 }
164
165 /*
166  * Routine for producing unrooted NJ trees from seperately aligned
167  * pairwise distances.  This produces the GUIDE DENDROGRAMS in
168  * PHYLIP format.
169  */
170 void UnRootedClusterTree::treeFromDistMatrix(DistMatrix* distMat, 
171                                 Alignment *alignPtr, int seq1, int nSeqs,
172                                  string& phylipName)
173 {
174     OutputFile phylipPhyTreeFile;
175     
176     try
177     {
178         // Test to see if the inputs are valid
179         if(seq1 < 1 || nSeqs < 1)
180         {
181             cerr << "Invalid inputs into treeFromDistMatrix \n"
182                  << "seq1 = " << seq1 << " nSeqs = " << nSeqs << "\n"
183                  << "Need to end program!\n";
184             exit(1); 
185             return;
186         }
187         PhyloTree phyloTree;
188         float dist;
189         string path;
190         verbose = false;
191         firstSeq = seq1;
192         lastSeq = firstSeq + nSeqs - 1;
193     
194         SeqInfo info;
195         info.firstSeq = firstSeq;
196         info.lastSeq = lastSeq;
197         info.numSeqs = nSeqs;
198         // Open up the phylipPhyTreeFile now!
199
200         // NOTE that this is not exactly correct. This may cause a problem when outputing
201         // a tree for each of the profiles. But then we can pass it in, maybe.
202         utilityObject->getPath(userParameters->getSeqName(), &path);
203         
204         if(nSeqs >= 2)
205         {
206             string name = phylipName;
207             if(!phylipPhyTreeFile.openFile(&name, 
208                              "\nEnter name for new GUIDE TREE           file  ", &path, "dnd",
209                              "Guide tree"))
210             {
211                 return;
212             }
213             phylipName = name;                    
214         }
215         else
216         {
217             return;
218         }
219                 
220         // Not sure about bootstrapping here!
221         clusAlgorithm = new NJTree();
222         outputTree = new ClusterTreeOutput(&info, 0);
223         
224         ofstream* ptrToFile = phylipPhyTreeFile.getPtrToFile();
225         
226         if (nSeqs == 2)
227         {
228             dist = (*distMat)(firstSeq, firstSeq + 1) / 2.0;
229             if(ptrToFile->is_open())
230             {
231                 (*ptrToFile) <<  "(" << alignPtr->getName(firstSeq) << ":" 
232                              << setprecision(5)
233                              << dist << "," << alignPtr->getName(firstSeq + 1) << ":" 
234                              << setprecision(5) << dist <<");\n";
235             }
236         }
237         else
238         {
239             int dimensions = lastSeq - firstSeq + 2;
240             phyloTree.treeDesc.resize(dimensions, vector<int>(dimensions));
241 /* debugging, also used when -OUTPUTTREE is used */
242 #if 0
243             ofstream debuglog("debug.treeFromDistMatrix.txt", ios::out);
244             clusAlgorithm->setVerbose(true);
245             clusAlgorithm->generateTree(&phyloTree, distMat, &info, &debuglog);
246 #else
247             clusAlgorithm->generateTree(&phyloTree, distMat, &info);
248 #endif            
249             
250             outputTree->printPhylipTree(&phyloTree, ptrToFile, alignPtr,
251                                         distMat, &bootTotals);
252         }
253         delete clusAlgorithm;
254         delete outputTree;
255     
256         //phylipPhyTreeFile.close();
257     }
258     catch(const exception &ex)
259     {
260         cerr << "ERROR: Error has occured in treeFromDistMatrix. " 
261              << "Need to terminate program.\n"
262              << ex.what();
263         exit(1);   
264     }
265     catch(...)
266     {
267         cerr << "ERROR: Error has occured in treeFromDistMatrix. " 
268              << "Need to terminate program.\n";      
269         exit(1);  
270     }
271 }
272
273 /*
274  * Note before I had this function was accepting a distance matrix. But I think it
275  * just uses the quickly generated one. So it doesnt need it anymore. But be careful !!!!
276  */
277 void UnRootedClusterTree::bootstrapTree(TreeNames* treeNames, Alignment *alignPtr)
278 {
279     int i, j;
280     int ranno;
281     string path;
282     
283     OutputFile clustalPhyTreeFile;
284     ofstream* ptrToClustalFile; 
285     OutputFile phylipPhyTreeFile;
286     OutputFile nexusPhyTreeFile;
287     
288     try
289     {
290         phyloTree = new PhyloTree; 
291         PhyloTree sampleTree;
292         PhyloTree standardTree;
293         PhyloTree saveTree;
294         int totalDists, overspill = 0, totalOverspill = 0;
295         int nfails = 0;
296         numSeqs = alignPtr->getNumSeqs(); 
297         firstSeq = 1;
298         lastSeq = numSeqs;
299         clusAlgorithm = new NJTree(); 
300
301         SeqInfo info;
302         info.firstSeq = firstSeq;
303         info.lastSeq = lastSeq;
304         info.numSeqs = numSeqs;    
305     
306         /**
307          * Check if numSeqs is ok
308          */
309         if(!checkIfConditionsMet(numSeqs, 4))
310         {
311             return;
312         } 
313
314         if (!userParameters->getOutputTreeClustal() && !userParameters->getOutputTreePhylip()
315             && !userParameters->getOutputTreeNexus())
316         {
317             utilityObject->error("You must select either clustal or phylip or nexus tree output format");
318             return;
319         }
320         
321         utilityObject->getPath(userParameters->getSeqName(), &path);
322
323         if(!openFilesForBootstrap(&clustalPhyTreeFile, &phylipPhyTreeFile, &nexusPhyTreeFile,
324                                   treeNames, &path))
325         {
326             return; // There was a problem opening the output files.
327         }       
328     
329         int _lenFirstSeq = alignPtr->getSeqLength(firstSeq);
330         bootTotals.clear();
331         bootTotals.resize(numSeqs + 1);
332         bootPositions.clear();
333         bootPositions.resize(_lenFirstSeq + 2);
334
335         for (j = 1; j <= _lenFirstSeq; ++j)
336         // First select all positions for
337         {
338             bootPositions[j] = j;
339         }
340         
341         // the "standard" tree
342         overspill = calcQuickDistMatForSubSet(clustalPhyTreeFile.getPtrToFile(),
343                 phylipPhyTreeFile.getPtrToFile(), nexusPhyTreeFile.getPtrToFile(), alignPtr);
344
345         // check if any distances overflowed the distance corrections
346         if (overspill > 0)
347         {
348             totalDists = (numSeqs *(numSeqs - 1)) / 2;
349             overspillMessage(overspill, totalDists);
350         }
351
352         treeGaps.clear();
353     
354         if (userParameters->getOutputTreeClustal())
355         {
356             verbose = true;
357         }
358         // Turn on screen output
359         phyloTree->treeDesc.resize(numSeqs + 1, vector<int>(numSeqs + 1));
360     
361         // compute the standard tree 
362
363         if (userParameters->getOutputTreeClustal() || userParameters->getOutputTreePhylip() ||
364             userParameters->getOutputTreeNexus())
365         {
366             clusAlgorithm->setVerbose(true); 
367             clusAlgorithm->generateTree(phyloTree, quickDistMat.get(), &info,
368                                         clustalPhyTreeFile.getPtrToFile());
369         }
370
371         ptrToClustalFile = clustalPhyTreeFile.getPtrToFile();
372
373         promptForBoolSeedAndNumTrials();
374
375         RandomGenerator randGenerator(userParameters->getBootRanSeed());
376         
377         /**
378          * Print bootstrap information to top of clustal bootstrap file!
379          */
380         if (userParameters->getOutputTreeClustal())
381         {
382             printBootstrapHeaderToClustalFile(ptrToClustalFile);
383         }
384
385         verbose = false; // Turn OFF screen output
386         clusAlgorithm->setVerbose(false);
387         
388         sampleTree.treeDesc.resize(numSeqs + 1, vector<int>(numSeqs + 1));
389     
390         if (userParameters->getMenuFlag())
391         {
392             cout <<  "\n\nEach dot represents 10 trials\n\n";
393         }
394     
395         totalOverspill = 0;
396         nfails = 0;
397         int lenSeq1 = alignPtr->getSeqLength(1);
398         for (i = 1; i <= userParameters->getBootNumTrials(); ++i)
399         {
400             for (j = 1; j <= alignPtr->getSeqLength(firstSeq); ++j)
401             {
402                 // select alignment positions for            
403                 ranno = randGenerator.addRand((unsigned long)lenSeq1) + 1;
404                 bootPositions[j] = ranno; // bootstrap sample 
405             }
406             
407             overspill = calcQuickDistMatForSubSet(clustalPhyTreeFile.getPtrToFile(),
408                 phylipPhyTreeFile.getPtrToFile(), nexusPhyTreeFile.getPtrToFile(), alignPtr,
409                 true);
410             
411             if (overspill > 0)
412             {
413                 totalOverspill = totalOverspill + overspill;
414                 nfails++;
415             }
416
417             treeGaps.clear();
418         
419             if (userParameters->getOutputTreeClustal() ||
420                 userParameters->getOutputTreePhylip() || userParameters->getOutputTreeNexus())
421             {
422                 // NOTE this is bad to pass in clustalPhyTreeFile
423                 clusAlgorithm->generateTree(&sampleTree, quickDistMat.get(), &info,
424                                             clustalPhyTreeFile.getPtrToFile());
425             }
426
427             sampleTree.leftBranch.clear();
428             sampleTree.rightBranch.clear();
429
430             compareTree(phyloTree, &sampleTree, &bootTotals, lastSeq - firstSeq + 1);
431         
432             if (userParameters->getMenuFlag())
433             {
434                 if (i % 10 == 0)
435                 {
436                     cout <<  ".";
437                 }
438                 if (i % 100 == 0)
439                 {
440                     cout << "\n";
441                 }
442             }
443         }
444
445         // check if any distances overflowed the distance corrections 
446         if (nfails > 0)
447         {
448             totalDists = (numSeqs *(numSeqs - 1)) / 2;
449             printErrorMessageForBootstrap(totalOverspill, totalDists, nfails);
450         }
451
452         bootPositions.clear();
453
454         outputTree = new ClusterTreeOutput(&info, userParameters->getBootstrapFormat());
455
456         /**
457          * Print ClustalTree with bootTotals
458          */
459         if (userParameters->getOutputTreeClustal())
460         {
461             outputTree->printTree(phyloTree, clustalPhyTreeFile.getPtrToFile(), &bootTotals);
462         }
463     
464         /**
465          * Print phylip tree with boottotals.
466          */
467         if (userParameters->getOutputTreePhylip())
468         {
469             // Save the old tree!
470             saveTree.treeDesc.resize(numSeqs + 1, vector<int>(numSeqs + 1));
471             saveTree.treeDesc.assign(phyloTree->treeDesc.begin(), phyloTree->treeDesc.end());
472         
473             outputTree->printPhylipTree(phyloTree, phylipPhyTreeFile.getPtrToFile(),
474                                         alignPtr, quickDistMat.get(), &bootTotals);
475             // reassign the old values!                            
476             phyloTree->treeDesc.assign(saveTree.treeDesc.begin(), saveTree.treeDesc.end());
477         }
478
479         /**
480          * print nexus tree with boottotals
481          */
482         if (userParameters->getOutputTreeNexus())
483         {
484             outputTree->printNexusTree(phyloTree, nexusPhyTreeFile.getPtrToFile(), alignPtr, 
485                                        quickDistMat.get(), &bootTotals);
486         }
487     
488         delete phyloTree;
489         delete clusAlgorithm;
490         delete outputTree;
491     }
492     catch(const exception &ex)
493     {
494
495     }
496 }
497
498 }