Mac binaries
[jabaws.git] / website / archive / binaries / mac / src / clustalw / src / tree / TreeInterface.cpp
1 /**
2  * Author: Mark Larkin
3  * 
4  * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson.  
5  */
6 /**
7  * Changes:
8  * Mark 10-5-2007: Bug fix # 42. Added getWeightsForQtLowScore function.
9  * Mark 22-5-2007: Made a change to getWeightsForQtLowScore
10  * Mark 23-5-2007: Made a change to getWeightsForQtLowScore
11  */
12 #ifdef HAVE_CONFIG_H
13     #include "config.h"
14 #endif
15 #include "TreeInterface.h"
16 #include <sstream>
17 #include "UnRootedClusterTree.h"
18 #include "Tree.h"
19 #include "../multipleAlign/MSA.h"
20 #include "../general/userparams.h"
21 #include "../general/debuglogObject.h"
22 #include "UPGMA/RootedGuideTree.h"
23 #include "UPGMA/RootedClusterTree.h"
24 namespace clustalw
25 {
26
27 /**
28  * This will be called by align!
29  *
30  */
31 auto_ptr<AlignmentSteps>
32 TreeInterface::getWeightsAndStepsFromDistMat(vector<int>* seqWeights, DistMatrix* distMat, 
33                                              Alignment *alignPtr, int seq1, int nSeqs, 
34                                              string* phylipName, bool* success)
35 {
36     #if DEBUGFULL 
37         if(logObject && DEBUGLOG)
38         {
39             logObject->logMsg("In getWeightsAndStepsFromDistMat\n");
40         }
41     #endif    
42     if(userParameters->getClusterAlgorithm() == UPGMA)
43     {
44         return getWeightsAndStepsFromDistMatUPGMA(seqWeights, distMat, alignPtr, 
45                                                 seq1, nSeqs, phylipName, success);
46     }
47     else
48     {
49         return getWeightsAndStepsFromDistMatNJ(seqWeights, distMat, alignPtr, 
50                                                 seq1, nSeqs, phylipName, success);
51     }
52 }
53
54                                    
55 /**
56  * This will be called by sequencesAlignToProfile. This function will put the distMat into
57  * a similarity matrix.
58  */
59 void
60 TreeInterface::getWeightsFromDistMat(vector<int>* seqWeights, DistMatrix* distMat, 
61                                      Alignment *alignPtr, int seq1, int nSeqs, 
62                                      string* phylipName, bool* success)
63 {
64     if(userParameters->getClusterAlgorithm() == UPGMA)
65     {    
66         getWeightsFromDistMatUPGMA(seqWeights, distMat, alignPtr, seq1, nSeqs, phylipName,
67                                    success);
68     }
69     else
70     {
71         getWeightsFromDistMatNJ(seqWeights, distMat, alignPtr, seq1, nSeqs, phylipName,
72                                 success);    
73     }
74 }
75
76 /**
77  * This function will be called by profileAlign
78  */
79 void
80 TreeInterface::getWeightsForProfileAlign(Alignment* alignPtr, DistMatrix* distMat, 
81                                          string* p1TreeName, vector<int>* p1Weights, string* p2TreeName, 
82                                          vector<int>* p2Weights, int numSeqs, int profile1NumSeqs, bool useTree1, 
83                                          bool useTree2, bool* success)
84 {    
85     if(userParameters->getClusterAlgorithm() == UPGMA)
86     {    
87         getWeightsForProfileAlignUPGMA(alignPtr, distMat, p1TreeName, p1Weights, p2TreeName,
88                                 p2Weights, numSeqs, profile1NumSeqs, useTree1, useTree2,
89                                 success);
90     }
91     else
92     {
93         getWeightsForProfileAlignNJ(alignPtr, distMat, p1TreeName, p1Weights, p2TreeName,
94                                 p2Weights, numSeqs, profile1NumSeqs, useTree1, useTree2,
95                                 success);    
96     }                               
97 }                    
98
99 /**
100  * This function will be called by doAlignUseOldGuideTree
101  *
102  */                                   
103 auto_ptr<AlignmentSteps>
104 TreeInterface::getWeightsAndStepsFromTree(Alignment* alignPtr, 
105                                           DistMatrix* distMat, string* treeName,
106                                           vector<int>* seqWeights, int fSeq, 
107                                           int numSeqs, bool* success)         
108 {
109     /**
110      * This will only use the NJ. It will not use UPGMA
111      */
112     return getWeightsAndStepsFromTreeNJ(alignPtr, distMat, treeName, seqWeights, 
113                                         fSeq, numSeqs, success);
114 }
115
116 /**
117  * Called by sequencesAlignToProfile
118  */
119 int
120 TreeInterface::getWeightsFromGuideTree(Alignment* alignPtr, DistMatrix* distMat,
121                                        string* treeName, vector<int>* seqWeights, int fSeq,
122                                        int nSeqs, bool* success)
123 {
124     /**
125      * This will only use the NJ. It will not use UPGMA
126      */    
127     return getWeightsFromGuideTreeNJ(alignPtr, distMat, treeName, seqWeights, fSeq, nSeqs, 
128                                       success);
129 }
130  
131 /**
132  * This will be called by doGuideTreeOnly. This does not put the distMat into similarity.
133  *
134  */                                     
135 void
136 TreeInterface::generateTreeFromDistMat(DistMatrix* distMat, Alignment *alignPtr, 
137                                        int seq1, int nSeqs, 
138                                        string* phylipName, bool* success)
139 {
140     /**
141      * This function does not put distMat into similarities
142      */
143     if(userParameters->getClusterAlgorithm() == UPGMA)
144     { 
145         RootedGuideTree guideTree;
146         generateTreeFromDistMatUPGMA(&guideTree, distMat, alignPtr, seq1, nSeqs, 
147                                      phylipName, success);
148     }
149     else
150     {                                            
151         generateTreeFromDistMatNJ(distMat, alignPtr, seq1, nSeqs, phylipName, success);
152     }
153 }
154
155 void
156 TreeInterface::treeFromAlignment(TreeNames* treeNames, Alignment *alignPtr)
157 {
158     if(userParameters->getClusterAlgorithm() == UPGMA)
159     {
160         RootedClusterTree clusterTree;
161         clusterTree.treeFromAlignment(treeNames, alignPtr);
162     }
163     else
164     {
165         UnRootedClusterTree clusterTree;
166         clusterTree.treeFromAlignment(treeNames, alignPtr);
167     }
168 }                   
169  
170 void
171 TreeInterface::bootstrapTree(TreeNames* treeNames, Alignment *alignPtr)
172 {
173     /**
174      * NOTE We only do bootstrapping on the NJ tree.
175      */
176     UnRootedClusterTree clusterTree;
177     clusterTree.bootstrapTree(treeNames, alignPtr);
178 }
179
180
181 /**
182  * Private functions!!!!
183  *
184  */
185
186  
187 /** *******************
188  *      
189  *      Neighbour joining functions
190  */ 
191 /**
192  * Note: After this function has been called the distance matrix will all be in similarities.
193  *
194  */   
195 auto_ptr<AlignmentSteps>
196 TreeInterface::getWeightsAndStepsFromDistMatNJ(vector<int>* seqWeights, DistMatrix* distMat, 
197                                                Alignment *alignPtr, int seq1, int nSeqs, 
198                                                string* phylipName, bool* success)
199 {   
200     auto_ptr<AlignmentSteps> progSteps;   
201     generateTreeFromDistMatNJ(distMat, alignPtr, seq1, nSeqs, phylipName, success);
202         
203     progSteps = getWeightsAndStepsUseOldGuideTreeNJ(distMat, alignPtr, phylipName,
204                                                     seqWeights, seq1, nSeqs, success);
205     return progSteps;
206 }
207
208 auto_ptr<AlignmentSteps>
209 TreeInterface::getWeightsAndStepsUseOldGuideTreeNJ(DistMatrix* distMat, Alignment *alignPtr,
210                                                    string* treeName, vector<int>* seqWeights, 
211                                                    int fSeq, int nSeqs, bool* success)
212 {   
213     Tree groupTree; 
214     auto_ptr<AlignmentSteps> progSteps;
215     
216     if(nSeqs == 1)
217     {
218         utilityObject->info("Only 1 sequence, cannot do multiple alignment\n");
219         *success = false;
220         return progSteps;
221     }
222     
223     int status = 0;
224     
225     status = readTreeAndCalcWeightsNJ(&groupTree, alignPtr, distMat, treeName,
226                                           seqWeights, fSeq, nSeqs);
227                                      
228     if(status == 0)
229     {
230         *success = false;
231         return progSteps;
232     }
233     
234     progSteps = groupTree.createSets(0, nSeqs);
235     int _numSteps = progSteps->getNumSteps();
236     
237     utilityObject->info("There are %d groups", _numSteps);        
238     // clear the memory used for the phylogenetic tree
239
240     if (nSeqs >= 2)
241     {
242         groupTree.clearTree(NULL);
243     }
244                                           
245     *success = true;
246     return progSteps;   
247 }
248                                                                                               
249
250                                                                                 
251 /**
252  * The function readTreeAndCalcWeightsNJ is used to read in the tree given the treeName.
253  * It then calls the appropriate functions to calc the seqWeights and make sure the matrix 
254  * is in similarity mode. 
255  */                                                                                
256 int
257 TreeInterface::readTreeAndCalcWeightsNJ(Tree* groupTree, Alignment* alignPtr, 
258                                         DistMatrix* distMat, string* treeName,
259                                         vector<int>* seqWeights, int fSeq, int nSeqs)
260 {
261     int status = 0;
262     if (nSeqs >= 2)
263     {
264         status = groupTree->readTree(alignPtr, treeName->c_str(), fSeq - 1, nSeqs);
265         if (status == 0)
266         {
267             return status;
268         }
269     }
270
271     groupTree->calcSeqWeights(fSeq - 1, nSeqs, seqWeights);
272     
273     status = groupTree->calcSimilarities(alignPtr, distMat);
274     
275     return status;                                                            
276 }
277
278 int
279 TreeInterface::getWeightsFromGuideTreeNJ(Alignment* alignPtr, DistMatrix* distMat,
280                                          string* treeName, vector<int>* seqWeights, int fSeq, int nSeqs, 
281                                          bool* success)
282 {
283     Tree groupTree;
284     int status = readTreeAndCalcWeightsNJ(&groupTree, alignPtr, distMat, treeName,
285                                           seqWeights, fSeq, nSeqs);
286     if(status == 0)
287     {
288         *success = false;
289     }
290     else
291     {
292         *success = true;
293     }
294     return status;
295 }                                                                                             
296
297 void
298 TreeInterface::getWeightsFromDistMatNJ(vector<int>* seqWeights, DistMatrix* distMat, 
299                                        Alignment *alignPtr, int seq1, int _nSeqs, 
300                                        string* phylipName, bool* success)
301 {
302     string copyOfPhylipName = string(*phylipName);     
303     int nSeqs = _nSeqs;
304     int status = 0;
305     
306     generateTreeFromDistMatNJ(distMat, alignPtr, seq1, nSeqs, phylipName, success);
307     
308     Tree groupTree;
309     status = readTreeAndCalcWeightsNJ(&groupTree, alignPtr, distMat, phylipName,
310                                           seqWeights, seq1, nSeqs);
311     if(status == 0)
312     {
313         *success = false;
314     }
315     else
316     {
317         *success = true;
318     }
319 }
320
321 void
322 TreeInterface::getWeightsForProfileAlignNJ(Alignment* alignPtr, DistMatrix* distMat, 
323                                            string* p1TreeName, vector<int>* p1Weights, string* p2TreeName, 
324                                            vector<int>* p2Weights, int numSeqs, int profile1NumSeqs, bool useTree1, 
325                                            bool useTree2, bool* success)
326 {
327     if(!useTree1)
328     {
329         if (profile1NumSeqs >= 2) 
330         {        
331             generateTreeFromDistMatNJ(distMat, alignPtr, 1, profile1NumSeqs, p1TreeName,
332                                         success);
333         }    
334     }
335     
336     if(!useTree2)
337     {
338         if(numSeqs - profile1NumSeqs >= 2) 
339         {        
340             generateTreeFromDistMatNJ(distMat, alignPtr, profile1NumSeqs + 1, 
341                                       numSeqs - profile1NumSeqs, p2TreeName, success);
342         }  
343     }
344     
345     if (userParameters->getNewTree1File() || userParameters->getNewTree2File()) 
346     {
347         *success = false;
348         return;
349     }    
350     // MSA->CALCPAIRWISE
351     
352     MSA* msaObj = new MSA();
353         
354     int count = msaObj->calcPairwiseForProfileAlign(alignPtr, distMat);
355     
356     if (count == 0) 
357     {
358         *success = false;
359         return;
360     }
361     
362     Tree groupTree1, groupTree2;
363     int status = 0;
364         
365     if (profile1NumSeqs >= 2)
366     {
367         status = groupTree1.readTree(alignPtr, p1TreeName->c_str(), 0, profile1NumSeqs);
368         if (status == 0)
369         {
370             *success = false;
371             return;
372         }
373     }
374     
375     groupTree1.calcSeqWeights(0, profile1NumSeqs, p1Weights);
376     
377     if (profile1NumSeqs >= 2)
378     {
379         groupTree1.clearTree(NULL);
380     }
381     
382     if (numSeqs - profile1NumSeqs >= 2)
383     {
384         status = groupTree2.readTree(alignPtr, p2TreeName->c_str(), profile1NumSeqs, numSeqs);
385         if (status == 0)
386         {
387             *success = false;
388             return;
389         }
390     }
391     
392     groupTree2.calcSeqWeights(profile1NumSeqs, numSeqs, p2Weights);
393
394
395     /* clear the memory for the phylogenetic tree */
396
397     if (numSeqs - profile1NumSeqs >= 2)
398     {
399         groupTree2.clearTree(NULL);
400     }
401     
402     /**
403      * Convert distances to similarities!!!!!!
404      */
405     for (int i = 1; i < numSeqs; i++)
406     {
407         for (int j = i + 1; j <= numSeqs; j++)
408         {
409             (*distMat)(i, j) = 100.0 - (*distMat)(i, j) * 100.0;
410             (*distMat)(j, i) = (*distMat)(i, j);
411         }
412     }    
413       
414     *success = true;
415             
416 }
417
418 void
419 TreeInterface::generateTreeFromDistMatNJ(DistMatrix* distMat, Alignment *alignPtr, 
420                                          int seq1, int nSeqs, 
421                                          string* phylipName, bool* success)
422 {
423     string copyOfPhylipName = string(*phylipName);     
424     
425     if (nSeqs >= 2) 
426     {
427         UnRootedClusterTree* clusterTree = new UnRootedClusterTree;   
428         clusterTree->treeFromDistMatrix(distMat, alignPtr, seq1, nSeqs, copyOfPhylipName);
429                                         
430         *phylipName = copyOfPhylipName;
431         // AW: message outputted by OutputFile function
432         // utilityObject->info("Guide tree        file created:   [%s]",
433         //                              phylipName->c_str());        
434         delete clusterTree;
435     }
436     *success = true;
437 }
438
439 auto_ptr<AlignmentSteps>
440 TreeInterface::getWeightsAndStepsFromTreeNJ(Alignment* alignPtr, 
441                                             DistMatrix* distMat, string* treeName,
442                                             vector<int>* seqWeights, int fSeq, int numSeqs, 
443                                             bool* success)
444 {
445     auto_ptr<AlignmentSteps> progSteps;
446     Tree groupTree; 
447     if(numSeqs == 1)
448     {
449         utilityObject->info("Only 1 sequence, cannot do multiple alignment\n");
450         *success = false;
451         return progSteps;
452     }
453     int status;
454     status = readTreeAndCalcWeightsNJ(&groupTree, alignPtr, distMat, treeName, 
455                                       seqWeights, fSeq, numSeqs);
456     
457     if (status == 0)
458     {
459         *success = false;
460         return progSteps;
461     }
462     
463     progSteps = groupTree.createSets(0, numSeqs);
464     int _numSteps = progSteps->getNumSteps();
465     utilityObject->info("There are %d groups", _numSteps);        
466     // clear the memory used for the phylogenetic tree
467
468     if (numSeqs >= 2)
469     {
470         groupTree.clearTree(NULL);
471     }
472     
473     *success = true;            
474     
475     return progSteps;
476
477 }
478
479 /**
480  * UPGMA functions
481  *
482  */
483  
484  
485 auto_ptr<AlignmentSteps>
486 TreeInterface::getWeightsAndStepsFromDistMatUPGMA(vector<int>* seqWeights, 
487                                                   DistMatrix* distMat, Alignment *alignPtr, 
488                                                   int seq1, int nSeqs, string* phylipName, bool* success)
489 {
490     auto_ptr<AlignmentSteps> progSteps;
491     RootedGuideTree guideTree;   
492
493     progSteps = generateTreeFromDistMatUPGMA(&guideTree, distMat, alignPtr, seq1, nSeqs,
494                                              phylipName, success);
495
496     guideTree.calcSeqWeights(0, nSeqs, seqWeights);
497     
498     distMat->makeSimilarityMatrix();
499                                                     
500     return progSteps;
501 }                                                                                             
502
503 auto_ptr<AlignmentSteps> TreeInterface::generateTreeFromDistMatUPGMA(RootedGuideTree* guideTree, DistMatrix* distMat, Alignment *alignPtr, int seq1, int nSeqs, 
504                                             string* phylipName, bool* success)
505 {
506     auto_ptr<AlignmentSteps> progSteps;
507     string copyOfPhylipName = string(*phylipName);     
508     
509     if (nSeqs >= 2) 
510     {
511         RootedClusterTree clusterTree;   
512         progSteps = clusterTree.treeFromDistMatrix(guideTree, distMat, alignPtr, seq1,
513                                                    nSeqs, copyOfPhylipName);
514                                         
515         *phylipName = copyOfPhylipName;
516         // AW: message outputted by OutputFile function
517         // utilityObject->info("Guide tree        file created:   [%s]",
518         //                              phylipName->c_str());        
519     }
520     *success = true;
521     return progSteps;
522 }
523                                   
524 void TreeInterface::getWeightsFromDistMatUPGMA(vector<int>* seqWeights, DistMatrix* distMat, 
525                                    Alignment *alignPtr, int seq1, int nSeqs, 
526                                    string* phylipName, bool* success)
527 {
528     getWeightsAndStepsFromDistMatUPGMA(seqWeights, distMat, alignPtr, 
529                                  seq1, nSeqs, phylipName, success);
530 }
531
532 /**
533  * The function getWeightsForProfileAlignUPGMA is used to generate the sequence weights
534  * that will be used in a profile alignment. It also recalculates the distance matrix, and 
535  * then returns it as a similarity matrix. This function uses the NJ code to read in a tree
536  * from a file if we are using a previous tree. This is because that part of the code is able
537  * to do the finding of the root etc.
538  */
539 void TreeInterface::getWeightsForProfileAlignUPGMA(Alignment* alignPtr, DistMatrix* distMat, 
540                              string* p1TreeName, vector<int>* p1Weights, string* p2TreeName, 
541                     vector<int>* p2Weights, int numSeqs, int profile1NumSeqs, bool useTree1, 
542                     bool useTree2, bool* success)
543 {
544     int status = 0;
545     
546     if(useTree1)
547     {
548         // Use the code to read in the tree and get the seqWeights
549         Tree groupTree1;
550         if (profile1NumSeqs >= 2)
551         {
552             status = groupTree1.readTree(alignPtr, p1TreeName->c_str(), 0, profile1NumSeqs);
553             if (status == 0)
554             {
555                 *success = false;
556                 return;
557             }
558         }
559         groupTree1.calcSeqWeights(0, profile1NumSeqs, p1Weights);
560     
561         if (profile1NumSeqs >= 2)
562         {
563             groupTree1.clearTree(NULL);
564         }                
565     }
566     else
567     {
568         if (profile1NumSeqs >= 2) 
569         {        
570             RootedGuideTree guideTree;
571             generateTreeFromDistMatUPGMA(&guideTree, distMat, alignPtr, 1, profile1NumSeqs, 
572                                          p1TreeName, success);
573             guideTree.calcSeqWeights(0, profile1NumSeqs, p1Weights);
574         }                                      
575     }
576     
577     if(useTree2)
578     {
579         Tree groupTree2;
580         if (numSeqs - profile1NumSeqs >= 2)
581         {
582             status = groupTree2.readTree(alignPtr, p2TreeName->c_str(), 
583                                          profile1NumSeqs, numSeqs);
584             if (status == 0)
585             {
586                 *success = false;
587                 return;
588             }
589         }
590         groupTree2.calcSeqWeights(profile1NumSeqs, numSeqs, p2Weights);
591     
592         if (numSeqs - profile1NumSeqs >= 2)
593         {
594             groupTree2.clearTree(NULL);
595         }    
596     }
597     else
598     {
599         if(numSeqs - profile1NumSeqs >= 2) 
600         {        
601             RootedGuideTree guideTree;
602             generateTreeFromDistMatUPGMA(&guideTree, distMat, alignPtr, profile1NumSeqs + 1,
603                                          numSeqs - profile1NumSeqs, p2TreeName, success);
604             guideTree.calcSeqWeights(profile1NumSeqs, numSeqs, p2Weights);
605         }    
606     }
607     
608     if (userParameters->getNewTree1File() || userParameters->getNewTree2File()) 
609     {
610         *success = false;
611         return;
612     }    
613     
614     MSA* msaObj = new MSA();
615         
616     int count = msaObj->calcPairwiseForProfileAlign(alignPtr, distMat);
617     
618     delete msaObj;
619     
620     if (count == 0) 
621     {
622         *success = false;
623         return;
624     }
625     
626     /**
627      * Convert distances to similarities!!!!!!
628      */   
629     distMat->makeSimilarityMatrix();
630       
631     *success = true;
632
633 }
634
635 /**
636  * Mark 10-5-2007: Bug fix # 42
637  * The following function is to be used only for calculating the weights for the Qt
638  * low scoring segments.
639  */
640 void TreeInterface::getWeightsForQtLowScore(vector<int>* seqWeights, DistMatrix* distMat, 
641                                    Alignment *alignPtr, int seq1, int nSeqs, 
642                                    string* phylipName, bool* success)
643 {
644     string copyOfPhylipName = string(*phylipName);     
645     int _nSeqs = nSeqs;
646     int status = 0;
647     
648     generateTreeFromDistMatNJ(distMat, alignPtr, seq1, _nSeqs, phylipName, success);
649     
650     Tree groupTree;
651
652     status = 0;
653     if (_nSeqs >= 2)
654     {
655         status = groupTree.readTree(alignPtr, phylipName->c_str(), seq1 - 1,
656                                     seq1 + _nSeqs-1); // mark 22-5-07
657         if(status == 0)
658         {
659             *success = false;
660             return;
661         }
662         else
663         {
664             *success = true;
665         }
666     }
667
668     groupTree.calcSeqWeights(seq1 - 1, seq1 + _nSeqs - 1, seqWeights); // mark 23-5-07
669 }
670                                                            
671 }