4 * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson.
9 * 10-02-07,Nigel Brown(EMBL): changed ifstream to InFileStream to handle
10 * cross-platform end-of-lines (for dendrograms, not for help file). Remerged
11 * this change 13-04-07.
13 * Mark 10-5-2007: Bug fix # 42. call getWeightsForQtLowScore function in
14 * QTcalcWeightsForLowScoreSeg instead of getWeightsFromDistMat.
16 * Mark 22-5-07, changed the distmatrix to be the size of alignObject.numSeqs
17 * Mark Change 20-6-07, added call to calculateMaxLengths()
18 * Mark, 3-7-07, Changed getHelp.
28 #include "pairwise/FullPairwiseAlign.h"
29 #include "pairwise/FastPairwiseAlign.h"
30 #include "multipleAlign/MSA.h"
31 #include "multipleAlign/LowScoreSegProfile.h"
32 #include "multipleAlign/Iteration.h"
33 #include "tree/TreeInterface.h"
34 #include "general/debuglogObject.h"
35 #include "general/statsObject.h"
36 #include "alignment/ObjectiveScore.h"
37 #include "general/ClustalWResources.h"
49 helpFileName = string("clustalw.hlp");
51 helpFileName = string("clustalw_help");
56 sequencesMsg = "\nUse the existing GUIDE TREE file, ";
57 profile1Msg = "\nUse the existing GUIDE TREE file for Profile 1, " ;
58 profile2Msg = "\nUse the existing GUIDE TREE file for Profile 2, ";
60 newProfile1TreePrompt = "\nEnter name for new GUIDE TREE file for profile 1 [";
61 newProfile2TreePrompt = "\nEnter name for new GUIDE TREE file for profile 2 [";
66 /** ***********************************************************************
67 * The function align is used to do a full multiple sequence alignment. *
68 **************************************************************************/
69 // FIXME: merge doAlignUseOldTree in here
70 void Clustal::align(string* phylipName, bool createOutput)
74 //start = time (NULL);
75 //ObjectiveScore score;
76 //double _score = score.getSagaScore(&alignmentObj);
77 //cout << "SAGA score " << _score << "\n";
81 AlignmentOutput alignOutput;
83 if(userParameters->getEmpty() && userParameters->getMenuFlag())
85 utilityObject->error("No sequences in memory. Load sequences first.");
89 userParameters->setStructPenalties1(NONE);
90 userParameters->setStructPenalties2(NONE);
92 alignmentObj.clearSecStruct1();
93 alignmentObj.clearSecStruct2();
95 utilityObject->getPath(userParameters->getSeqName(), &path);
97 if(createOutput && (userParameters->getMenuFlag() || !userParameters->getInteractive()))
99 if(!alignOutput.openAlignmentOutput(path))
104 else if(createOutput)
106 // We are using clustalQT.
107 // Open all the files.
108 if(!alignOutput.QTOpenFilesForOutput(QTFileNames))
110 return; // could not open the files.
114 if (userParameters->getSaveParameters())
116 userParameters->createParameterOutput();
119 if(userParameters->getResetAlignmentsNew() || userParameters->getResetAlignmentsAll())
121 alignmentObj.resetAlign();
123 if(userParameters->getDisplayInfo())
125 cout << "Start of Pairwise alignments\n";
126 cout << "Aligning...\n";
128 if(userParameters->getDNAFlag())
130 userParameters->setDNAParams();
134 userParameters->setProtParams();
137 if (statsObject->isEnabled()) {
138 statsObject->logInputSeqStats(&alignmentObj);
142 /// STEP 1: PAIRWISE ALIGNMENT TO GENERATE DISTANCE MATRIX
145 int _numSeqs = alignmentObj.getNumSeqs();
146 distMat.ResizeRect(_numSeqs + 1);
148 PairwiseAlignBase* pairwiseDist;
149 if (userParameters->getQuickPairAlign())
151 pairwiseDist = new FastPairwiseAlign();
155 pairwiseDist = new FullPairwiseAlign();
157 // Generate distance matrix!
158 pairwiseDist->pairwiseAlign(&alignmentObj, &distMat, 0, _numSeqs, 0, _numSeqs);
161 bool success = false;
162 auto_ptr<AlignmentSteps> progSteps;
163 vector<int> seqWeight(_numSeqs + 1);
166 if(logObject && DEBUGLOG)
168 logObject->logMsg("Calling getWeightsAndStepsFromDistMat\n");
172 TreeInterface calcSteps;
173 progSteps = calcSteps.getWeightsAndStepsFromDistMat(&seqWeight, &distMat, &alignmentObj,
174 1, _numSeqs, phylipName, &success);
175 //cout << "weights and steps calculated!\n";
177 //dif = difftime(end, start);
178 //cout << "It took " << dif << " seconds so Far\n";
183 if(logObject && DEBUGLOG)
185 logObject->logMsg("Unsuccessful!!!\n");
191 if(logObject && DEBUGLOG)
193 logObject->logMsg("doing multiSeqAlign\n");
196 MSA* msaObj = new MSA();
197 count = msaObj->multiSeqAlign(&alignmentObj, &distMat, &seqWeight, progSteps.get(), 0);
199 //cout << "alignment finished!\n";
202 //dif = difftime(end, start);
203 //cout << "It took " << dif << " seconds so Far\n";
210 if (userParameters->getMenuFlag())
215 /// Do iteration to improve alignment!!!
216 if(userParameters->getDoRemoveFirstIteration() == ALIGNMENT)
218 //userParameters->setNumIterations(_numSeqs * 2);
219 Iteration iterateObj;
220 iterateObj.removeFirstIterate(&alignmentObj);
221 alignmentObj.calculateMaxLengths(); // Mark Change 20-6-07
222 if(userParameters->getDisplayInfo())
223 cout << "Finished iteration\n";
226 if (statsObject->isEnabled()) {
227 statsObject->logAlignedSeqStats(&alignmentObj);
232 /// STEP 4: OUTPUT THE ALIGNMENT
235 alignOutput.createAlignmentOutput(&alignmentObj, 1, _numSeqs);
240 //dif = difftime(end, start);
241 //cout << "It took " << dif << " seconds\n";
244 /** ****************************************************************************
245 * The function sequencesAlignToProfile is used to align a set of sequences to *
247 *******************************************************************************/
248 void Clustal::sequencesAlignToProfile(string* phylipName)
256 AlignmentOutput alignOutput;
258 if(userParameters->getProfile1Empty() && userParameters->getMenuFlag())
260 utilityObject->error("No profile in memory. Input 1st profile first.\n");
264 if(userParameters->getProfile2Empty() && userParameters->getMenuFlag())
266 utilityObject->error("No sequences in memory. Input sequences first.\n");
270 utilityObject->getPath(userParameters->getProfile2Name(), &path);
272 if(userParameters->getMenuFlag() || !userParameters->getInteractive())
274 if(!alignOutput.openAlignmentOutput(path))
281 // We are using clustalQT.
282 // Open all the files.
283 if(!alignOutput.QTOpenFilesForOutput(QTFileNames))
285 return; // could not open the files.
289 newSeq = alignmentObj.getProfile1NumSeqs() + 1;
291 // check for secondary structure information for list of sequences
293 saveSS2 = userParameters->getUseSS2();
295 if (userParameters->getStructPenalties2() != NONE && userParameters->getUseSS2() == true
296 && (alignmentObj.getNumSeqs() - alignmentObj.getProfile1NumSeqs() > 1))
298 if (userParameters->getStructPenalties2() == SECST)
300 utilityObject->warning("\n\nWARNING: ignoring secondary structure for a list of sequences\n\n");
302 else if (userParameters->getStructPenalties2() == GMASK)
304 utilityObject->warning("\n\nWARNING: ignoring gap penalty mask for a list of sequences\n\n");
306 userParameters->setUseSS2(false);
310 int _numSeqs = alignmentObj.getNumSeqs();
311 distMat.ResizeRect(_numSeqs + 1);
314 // Convert to similarities!!!!!!!!
315 // This is calcualting the similarities of the sequences in the profile part
317 for (i = 1; i <= newSeq; i++)
319 for (j = i + 1; j <= newSeq; j++)
321 dscore = alignmentObj.countid(i,j);
322 distMat(i, j) = ((double)100.0 - (double)dscore)/(double)100.0;
323 distMat(j, i) = distMat(i, j);
326 //distMat.printArray();
328 InFileStream _treeFile; //nige
332 // Note put this into a separate function!!!!!
336 useTree = useExistingGuideTree(Sequences, phylipName, path);
339 if (userParameters->getSaveParameters())
341 userParameters->createParameterOutput();
344 if(userParameters->getResetAlignmentsNew() || userParameters->getResetAlignmentsAll())
346 alignmentObj.resetProfile2();
350 alignmentObj.fixGaps();
354 if (userParameters->getStructPenalties1() == SECST)
356 _length = alignmentObj.getSeqLength(1);
357 calcGapPenaltyMask(_length, alignmentObj.getSecStructMask1(),
358 alignmentObj.getGapPenaltyMask1());
360 if (userParameters->getStructPenalties2() == SECST)
362 _length = alignmentObj.getSeqLength(alignmentObj.getProfile1NumSeqs() + 1);
363 calcGapPenaltyMask(_length, alignmentObj.getSecStructMask2(),
364 alignmentObj.getGapPenaltyMask2());
367 // PROGRESSIVE ALIGNMENT ALGORITHM //
368 vector<int> seqWeights(_numSeqs + 1);
370 bool success = false;
372 if (useTree == false) // create the new tree file, if necessary
374 if(userParameters->getDisplayInfo())
376 cout << "Start of Pairwise alignments\n";
377 cout << "Aligning...\n";
380 if(userParameters->getDNAFlag())
382 userParameters->setDNAParams();
386 userParameters->setProtParams();
389 // STEP 1: CALCULATE DISTANCE MATRIX USING PAIRWISE ALIGNMENT //
390 PairwiseAlignBase* pairwiseDist;
391 if (userParameters->getQuickPairAlign())
393 pairwiseDist = new FastPairwiseAlign();
397 pairwiseDist = new FullPairwiseAlign();
400 // Generate distance matrix!
401 pairwiseDist->pairwiseAlign(&alignmentObj, &distMat, 0, _numSeqs, newSeq-2, _numSeqs);
404 if(userParameters->getDisplayInfo())
407 TreeInterface calcSeqWeights;
408 calcSeqWeights.getWeightsFromDistMat(&seqWeights, &distMat, &alignmentObj, 1,
409 _numSeqs, phylipName, &success);
413 TreeInterface calcSeqWeights;
414 calcSeqWeights.getWeightsFromGuideTree(&alignmentObj, &distMat, phylipName,
415 &seqWeights, 1, _numSeqs, &success);
422 // If it is true, call the function here to get the seqweights etc from it.
424 // if users request only the guide tree, return
425 if (userParameters->getNewTreeFile())
431 MSA* msaObj = new MSA();
432 count = msaObj->seqsAlignToProfile(&alignmentObj, &distMat, &seqWeights, newSeq - 2,
436 userParameters->setUseSS2(saveSS2);
443 if (userParameters->getMenuFlag())
448 /// STEP 4: OUTPUT THE ALIGNMENT //
449 alignOutput.createAlignmentOutput(&alignmentObj, 1, _numSeqs);
454 /** ****************************************************************************
455 * The function profileAlign is used to align two profiles *
456 *******************************************************************************/
457 void Clustal::profileAlign(string* p1TreeName, string* p2TreeName)
461 bool useTree1, useTree2;
462 int count, i, j, dscore;
463 int _profile1NumSeqs = alignmentObj.getProfile1NumSeqs();
464 AlignmentOutput alignOutput;
466 if(userParameters->getProfile1Empty() || userParameters->getProfile2Empty())
468 utilityObject->error("No sequences in memory. Load sequences first.\n\n");
472 utilityObject->getPath(userParameters->getProfile1Name(), &path);
474 if(userParameters->getMenuFlag() || !userParameters->getInteractive())
476 if(!alignOutput.openAlignmentOutput(path))
483 // We are using clustalQT.
484 // Open all the files.
485 if(!alignOutput.QTOpenFilesForOutput(QTFileNames))
487 return; // could not open the files.
491 if(userParameters->getResetAlignmentsNew() || userParameters->getResetAlignmentsAll())
493 alignmentObj.resetProfile1();
494 alignmentObj.resetProfile2();
498 alignmentObj.fixGaps();
501 // Check if there exists a tree for profile1
504 if (_profile1NumSeqs >= 2)
506 useTree1 = useExistingGuideTree(Profile1, p1TreeName, path);
509 // Check if there exists a tree for profile2
512 utilityObject->getPath(userParameters->getProfile2Name(), &path);
514 if (alignmentObj.getNumSeqs() - _profile1NumSeqs >= 2)
516 useTree2 = useExistingGuideTree(Profile2, p2TreeName, path);
519 if (userParameters->getSaveParameters())
521 userParameters->createParameterOutput();
524 if (userParameters->getStructPenalties1() == SECST)
526 _length = alignmentObj.getSeqLength(1);
527 calcGapPenaltyMask(_length, alignmentObj.getSecStructMask1(),
528 alignmentObj.getGapPenaltyMask1());
531 if (userParameters->getStructPenalties2() == SECST)
533 _length = alignmentObj.getSeqLength(_profile1NumSeqs + 1);
534 calcGapPenaltyMask(_length, alignmentObj.getSecStructMask2(),
535 alignmentObj.getGapPenaltyMask2());
538 // Declare the distance matrix
540 int _numSeqs = alignmentObj.getNumSeqs();
541 distMat.ResizeRect(_numSeqs + 1);
543 if (useTree1 == false)
545 if (_profile1NumSeqs >= 2)
547 for (i = 1; i <= _profile1NumSeqs; i++)
549 for (j = i + 1; j <= _profile1NumSeqs; j++)
551 dscore = static_cast<int>(alignmentObj.countid(i,j));
552 distMat(i, j) = (100.0 - dscore)/100.0;
553 distMat(j, i) = distMat(i, j);
556 utilityObject->getPath(userParameters->getProfile1Name(), &path);
558 // We need to get the name of the file first because the message is different!
559 if(userParameters->getMenuFlag())
561 promptForNewGuideTreeName(Profile1, p1TreeName, path);
566 treeName = path + "dnd";
567 p1TreeName = new string(treeName);
571 if (useTree2 == false)
573 if(_numSeqs - _profile1NumSeqs >= 2)
575 for (i = 1 + _profile1NumSeqs; i <= _numSeqs; i++)
577 for (j = i + 1; j <= _numSeqs; j++)
579 dscore = static_cast<int>(alignmentObj.countid(i,j));
580 distMat(i, j) = (100.0 - dscore) / 100.0;
581 distMat(j, i) = distMat(i, j);
585 utilityObject->getPath(userParameters->getProfile2Name(), &path);
587 if(userParameters->getMenuFlag())
589 promptForNewGuideTreeName(Profile2, p2TreeName, path);
594 treeName = path + "dnd";
595 p2TreeName = new string(treeName);
602 bool success = false;
603 vector<int> prof1Weight, prof2Weight;
604 prof1Weight.resize(_profile1NumSeqs);
605 prof2Weight.resize(_numSeqs);
607 tree.getWeightsForProfileAlign(&alignmentObj, &distMat, p1TreeName, &prof1Weight,
608 p2TreeName, &prof2Weight, _numSeqs, _profile1NumSeqs,
609 useTree1, useTree2, &success);
615 // do an initial alignment to get the pairwise identities between the two
616 // profiles - used to set parameters for the final alignment
617 MSA* msaObj = new MSA();
619 alignmentObj.resetProfile1();
620 alignmentObj.resetProfile2();
622 count = msaObj->doProfileAlign(&alignmentObj, &distMat, &prof1Weight, &prof2Weight);
629 if(userParameters->getMenuFlag())
634 alignOutput.createAlignmentOutput(&alignmentObj, 1, _numSeqs);
640 void Clustal::doGuideTreeOnly(string* phylipName)
643 if(userParameters->getEmpty())
645 utilityObject->error("No sequences in memory. Load sequences first.\n");
649 userParameters->setStructPenalties1(NONE);
650 userParameters->setStructPenalties2(NONE);
652 alignmentObj.clearSecStruct1();
653 alignmentObj.clearSecStruct2();
655 if(userParameters->getResetAlignmentsNew() || userParameters->getResetAlignmentsAll())
657 alignmentObj.resetAlign();
660 utilityObject->getPath(userParameters->getSeqName(), &path);
661 int _numSeqs = alignmentObj.getNumSeqs();
665 utilityObject->error("Less than 2 sequences in memory. Phylogenetic tree cannot be built.\n");
669 if (userParameters->getSaveParameters())
671 userParameters->createParameterOutput();
674 if(userParameters->getDisplayInfo())
676 cout << "Start of Pairwise alignments\n";
677 cout << "Aligning...\n";
680 if(userParameters->getDNAFlag())
682 userParameters->setDNAParams();
686 userParameters->setProtParams();
689 ///STEP 1: PAIRWISE ALIGNMENT TO GENERATE DISTANCE MATRIX //
692 distMat.ResizeRect(_numSeqs + 1);
694 PairwiseAlignBase* pairwiseDist;
695 if (userParameters->getQuickPairAlign())
697 pairwiseDist = new FastPairwiseAlign();
701 pairwiseDist = new FullPairwiseAlign();
703 // Generate distance matrix!
704 pairwiseDist->pairwiseAlign(&alignmentObj, &distMat, 0, _numSeqs, 0, _numSeqs);
708 fprintf(stdout, "\nDEBUG: distance matrix following...\n");
709 distMat.printArray();
712 bool success = false;
714 tree.generateTreeFromDistMat(&distMat, &alignmentObj, 1, _numSeqs, phylipName, &success);
716 if(userParameters->getResetAlignmentsNew() || userParameters->getResetAlignmentsAll())
718 alignmentObj.resetAlign();
725 // FIXME this is to 90% identical with align(), please merge
726 void Clustal::doAlignUseOldTree(string* phylipName)
730 AlignmentOutput alignOutput;
732 if(userParameters->getEmpty())
734 utilityObject->error("No sequences in memory. Load sequences first.\n");
738 userParameters->setStructPenalties1(NONE);
739 userParameters->setStructPenalties2(NONE);
741 alignmentObj.clearSecStruct1();
742 alignmentObj.clearSecStruct2();
744 utilityObject->getPath(userParameters->getSeqName(), &path);
746 if(userParameters->getMenuFlag() || !userParameters->getInteractive())
748 if(!alignOutput.openAlignmentOutput(path))
755 // We are using clustalQT.
756 // Open all the files.
757 if(!alignOutput.QTOpenFilesForOutput(QTFileNames))
759 return; // could not open the files.
763 if(userParameters->getResetAlignmentsNew() || userParameters->getResetAlignmentsAll())
765 alignmentObj.resetAlign();
768 int _numSeqs = alignmentObj.getNumSeqs();
770 distMat.ResizeRect(_numSeqs + 1);
771 utilityObject->getPath(userParameters->getSeqName(), &path);
776 if(userParameters->getMenuFlag())
778 phylipName = new string(path);
779 *phylipName = *phylipName + "dnd";
781 string message, answer;
782 message = "\nEnter a name for the guide tree file [" + *phylipName + "]";
783 utilityObject->getStr(message, answer);
787 phylipName = new string(answer);
791 if(userParameters->getMenuFlag() || !userParameters->getInteractive())
793 InFileStream _treeFile; //nige
794 _treeFile.open(phylipName->c_str());
795 if(!_treeFile.is_open())
797 utilityObject->error("Cannot open tree file [%s]\n", phylipName->c_str());
805 if(userParameters->getDisplayInfo())
807 cout << "Start of Pairwise alignments\n";
808 cout << "Aligning...\n";
810 if(userParameters->getDNAFlag())
812 userParameters->setDNAParams();
816 userParameters->setProtParams();
819 PairwiseAlignBase* pairwiseDist;
820 if (userParameters->getQuickPairAlign())
822 pairwiseDist = new FastPairwiseAlign();
826 pairwiseDist = new FullPairwiseAlign();
828 // Generate distance matrix!
829 pairwiseDist->pairwiseAlign(&alignmentObj, &distMat, 0, _numSeqs, 0, _numSeqs);
833 if (userParameters->getSaveParameters())
835 userParameters->createParameterOutput();
837 auto_ptr<AlignmentSteps> progSteps;
838 vector<int> seqWeights(_numSeqs + 1);
839 bool success = false;
841 progSteps = tree.getWeightsAndStepsFromTree(&alignmentObj, &distMat, phylipName,
842 &seqWeights, 1, _numSeqs, &success);
849 MSA* msaObj = new MSA();
850 count = msaObj->multiSeqAlign(&alignmentObj, &distMat, &seqWeights, progSteps.get(), 0);
858 if (userParameters->getMenuFlag())
863 // same as in align()
864 if(userParameters->getDoRemoveFirstIteration() == ALIGNMENT)
866 //userParameters->setNumIterations(_numSeqs * 2);
867 Iteration iterateObj;
868 iterateObj.removeFirstIterate(&alignmentObj);
869 alignmentObj.calculateMaxLengths(); // Mark Change 20-6-07
870 if(userParameters->getDisplayInfo())
871 cout << "Finished iteration\n";
875 alignOutput.createAlignmentOutput(&alignmentObj, 1, _numSeqs);
877 phylipName = new string("");
882 void Clustal::getFullHelp()
884 vector<string> markers;
886 bool showtitle = true;
888 markers = myhelp.ListSectionMarkers();
889 for (unsigned int i=0; i<markers.size(); i++) {
890 string m = markers[i];
891 getHelp(m, showtitle);
895 void Clustal::getHelp(char helpPointer, bool printTitle)
899 Clustal::getHelp(s, printTitle);
905 * Andreas Wilm (UCD): edited to support new help system (separate
906 * file now which is compiled in)
909 * The clustal help file is called clustalw_help. Should be in the same
910 * directory. I have changed it to a C++ style implementation. I am taking
911 * out support for VMS until later. We will see if we are still supporting that platform.
913 void Clustal::getHelp(string helpPointer, bool printTitle)
918 helpString = myhelp.GetSection(helpPointer);
921 helpString = "\n\n>> HELP " +
924 myhelp.GetSectionTitle(helpPointer) +
930 if(! userParameters->getMenuFlag())
936 string::size_type lastPos = 0;
937 string::size_type pos = helpString.find_first_of("\n", lastPos);
940 while (pos != string::npos)
942 cout << helpString.substr(lastPos, pos - lastPos);
945 if(nlines >= PAGE_LEN)
948 cout << "\nPress [RETURN] to continue or X to stop ";
950 if(toupper(tempChar) == 'X')
960 lastPos = pos; //helpString.find_first_not_of("\n", pos);
961 pos = helpString.find_first_of("\n", lastPos+1);
962 //cerr << "DEBUG: pos=" << pos << " lastPos=" << lastPos << "/" << helpString.length() << "\n";
970 * The wrap functions will be used with interface classes to perform the tasks
971 * that were previously done there.
973 int Clustal::sequenceInput(bool append, string *offendingSeq)
976 // If we are not appending, we need to clear the Alignment object.
979 alignmentObj.clearAlignment();
982 FileReader readSeqFile;
983 code = readSeqFile.seqInput(&alignmentObj, append, offendingSeq);
988 * profile1Input is a wrapper function for the profileInput. This is because the
989 * other classes dont have access to FileReader
991 int Clustal::profile1Input(string profile1Name)
994 // I need to clear out the Alignment object.
995 alignmentObj.clearAlignment();
996 userParameters->setProfileNum(1);
998 userParameters->setSeqName(profile1Name);
999 userParameters->setProfile1Name(profile1Name);
1001 FileReader readProfileFile;
1002 code = readProfileFile.profileInput(&alignmentObj);
1004 // If we are using the commandline check if there are seqs!
1005 if(!userParameters->getInteractive())
1007 // AW: FIXME code should be handled higher up the stack and check all codes
1008 // also, shouldnt we use utilityObject->error()?
1011 if (code==NOSEQUENCESINFILE)
1012 cerr << "ERROR: There are no sequences in profile2 file." << std::endl;
1013 else if (code==ALLNAMESNOTDIFFERENT)
1014 cerr << "ERROR: Not all sequence names are different" << std::endl;
1016 cerr << "ERROR: Unhandled error code (" << code << ") returned from profileInput.\n";
1017 // AW: should we really exit here? What if called from clustalx?
1026 * profile2Input is a wrapper function for the profileInput. This is because the
1027 * other classes dont have access to FileReader
1029 int Clustal::profile2Input(string profile2Name)
1033 if(userParameters->getProfileNum() == 2)
1035 // Remove the sequences from the previous one.
1036 int numSeqsProfile1 = alignmentObj.getProfile1NumSeqs();
1037 alignmentObj.resizeSeqArray(numSeqsProfile1 + 1);
1038 // Clear anything from profile2 in alignment.
1039 alignmentObj.clearSecStruct2();
1041 userParameters->setProfileNum(2);
1043 userParameters->setSeqName(profile2Name);
1044 userParameters->setProfile2Name(profile2Name);
1046 FileReader readProfileFile;
1047 code = readProfileFile.profileInput(&alignmentObj);
1049 if(!userParameters->getInteractive())
1051 // AW: FIXME code should be handled higher up the stack and check all codes
1052 // also, shouldnt we use utilityObject->error()?
1054 if (code==NOSEQUENCESINFILE)
1055 cerr << "ERROR: There are no sequences in profile2 file." << std::endl;
1056 else if (code==ALLNAMESNOTDIFFERENT)
1057 cerr << "ERROR: Not all sequence names are different" << std::endl;
1059 cerr << "ERROR: Unhandled error code (" << code << ") returned from profileInput.\n";
1060 // AW: should we really exit here? What if called from clustalx?
1069 * The function commandline_readseq is called by the command line to
1070 * read in the sequences. It also prints out the names. This was
1071 * previously done by the command line parser.
1073 int Clustal::commandLineReadSeq(int firstSeq)
1075 // Clear the alignment, although obviously there shouldnt be anything in it.
1076 alignmentObj.clearAlignment();
1077 userParameters->setProfileNum(0);
1079 string offendingSeq;
1080 FileReader readInputFile;
1081 code = readInputFile.readSeqs(&alignmentObj, firstSeq, &offendingSeq);
1085 if(code == CANNOTOPENFILE)
1087 utilityObject->error("Cannot open input file. No alignment!\n");
1089 else if(code == NOSEQUENCESINFILE)
1091 utilityObject->error("No sequences in file. No alignment!\n");
1093 else if(code == ALLNAMESNOTDIFFERENT)
1095 utilityObject->error("Multiple sequences found with same name (found %s at least twice)!", offendingSeq.c_str());
1097 else if(code == EMPTYSEQUENCE)
1099 utilityObject->error("Empty sequences found: %s\n", offendingSeq.c_str());
1101 else if(code == SEQUENCETOOBIG)
1103 utilityObject->error("Sequence(s) too big: %s\n", offendingSeq.c_str());
1105 else if(code == BADFORMAT)
1107 utilityObject->error("Sequences are badly formatted!\n");
1111 utilityObject->error("\nThere was a problem reading in the file. No alignment!\n");
1116 alignmentObj.printSequencesAddedInfo();
1118 userParameters->setEmpty(false);
1123 * The function outputNow is used to output the alignment. It can be called by the
1124 * menu or command line parser.
1126 void Clustal::outputNow()
1128 if(alignmentObj.getNumSeqs() > 0)
1131 if(!userParameters->getMenuFlag())
1133 string _seqName = userParameters->getSeqName();
1134 utilityObject->getPath(_seqName, &path);
1136 AlignmentOutput alignmentOutput;
1137 alignmentOutput.openAlignmentOutput(path);
1139 alignmentOutput.createAlignmentOutput(&alignmentObj, 1, alignmentObj.getNumSeqs());
1143 utilityObject->error("No sequences have been loaded\n");
1147 void Clustal::phylogeneticTree(string* phylipName, string* clustalName, string* distName,
1148 string* nexusName, string pimName)
1150 TreeNames treeNames;
1151 treeNames.clustalName = *clustalName;
1152 treeNames.distName = *distName;
1153 treeNames.nexusName = *nexusName;
1154 treeNames.phylipName = *phylipName;
1155 treeNames.pimName = pimName;
1157 tree.treeFromAlignment(&treeNames, &alignmentObj);
1160 void Clustal::bootstrapTree(string* phylipName, string* clustalName, string* nexusName)
1162 TreeNames treeNames;
1163 treeNames.clustalName = *clustalName;
1164 treeNames.nexusName = *nexusName;
1165 treeNames.phylipName = *phylipName;
1167 tree.bootstrapTree(&treeNames, &alignmentObj);
1170 void Clustal::initInterface()
1172 userParameters->setEmpty(true);
1173 userParameters->setProfile1Empty(true);
1174 userParameters->setProfile2Empty(true);
1178 * The function calcGapPenaltyMask is used to calculate the gapMask from the secondary
1179 * structure information.
1181 void Clustal::calcGapPenaltyMask(int prfLength, vector<char>* mask, vector<char>* gapMask)
1185 vector<char> structMask;
1186 structMask.resize(prfLength + 1);
1188 int _helixEndPlus = userParameters->getHelixEndPlus();
1189 int _helixEndMinus = userParameters->getHelixEndMinus();
1190 int _strandEndPlus = userParameters->getStrandEndPlus();
1191 int _strandEndMinus = userParameters->getStrandEndMinus();
1194 while (i < prfLength)
1196 if (tolower((*mask)[i]) == 'a' || (*mask)[i] == '$')
1198 for (j = -_helixEndPlus; j < 0; j++)
1200 if(i + j < (int)structMask.size())
1202 if ((i + j >= 0) && (tolower(structMask[i + j]) != 'a') &&
1203 (tolower(structMask[i + j]) != 'b'))
1205 structMask[i + j] = 'a';
1209 for (j = 0; j < _helixEndMinus; j++)
1211 if (i + j >= prfLength || (tolower((*mask)[i + j]) != 'a'
1212 && (*mask)[i + j] != '$'))
1216 structMask[i + j] = 'a';
1219 while (tolower((*mask)[i]) == 'a' || (*mask)[i] == '$')
1225 if ((*mask)[i] == '$')
1227 structMask[i] = 'A';
1233 structMask[i] = (*mask)[i];
1237 for (j = 0; j < _helixEndMinus; j++)
1239 if ((i - j - 1 >= 0) && (tolower((*mask)[i - j - 1]) == 'a'
1240 || (*mask)[i - j - 1] == '$'))
1242 structMask[i - j - 1] = 'a';
1245 for (j = 0; j < _helixEndPlus; j++)
1247 if (i + j >= prfLength)
1251 structMask[i+j] = 'a';
1254 else if (tolower((*mask)[i]) == 'b' || (*mask)[i] == '%')
1256 for (j = -_strandEndPlus; j < 0; j++)
1258 if ((i + j >= 0) && (tolower(structMask[i + j]) != 'a')
1259 && (tolower(structMask[i + j]) != 'b'))
1261 structMask[i + j] = 'b';
1264 for (j = 0; j < _strandEndPlus; j++)
1266 if (i + j >= prfLength || (tolower((*mask)[i + j]) != 'b'
1267 && (*mask)[i + j] != '%'))
1271 structMask[i+j] = 'b';
1274 while (tolower((*mask)[i]) == 'b' || (*mask)[i] == '%')
1280 if ((*mask)[i] == '%')
1282 structMask[i] = 'B';
1288 structMask[i] = (*mask)[i];
1292 for (j = 0; j < _strandEndMinus; j++)
1294 if ((i-j-1>=0) && (tolower((*mask)[i-j-1]) == 'b' || (*mask)[i-j-1] == '%'))
1296 structMask[i - j - 1] = 'b';
1299 for (j = 0; j < _strandEndPlus; j++)
1301 if (i + j >= prfLength)
1305 structMask[i+j] = 'b';
1314 for(i = 0; i < prfLength;i++)
1316 switch (structMask[i])
1319 (*gapMask)[i] = userParameters->getHelixPenalty() + '0';
1322 (*gapMask)[i] = userParameters->getHelixEndPenalty() + '0';
1325 (*gapMask)[i] = userParameters->getStrandPenalty() +'0';
1328 (*gapMask)[i] = userParameters->getStrandEndPenalty() + '0';
1331 (*gapMask)[i] = userParameters->getLoopPenalty() + '0';
1339 * The function QTcalcLowScoreSegments is used to calculate the residues in the sequences
1341 * @param firstSeq first seq in the alignment or profile
1342 * @param nSeqs the number of sequences
1343 * @param nCols the length of the longest seq
1344 * @param seqWeight a vector to hold the sequence weights
1345 * @param lowScoreRes
1346 * @param seqWeightCalculated
1348 void Clustal::QTcalcLowScoreSegments(LowScoreSegParams* params)
1356 int matrix[NUMRES][NUMRES];
1359 vector<float> pscore;
1360 int _maxAA = userParameters->getMaxAA();
1362 // STEP 1: Calculate the sequence weights
1363 QTcalcWeightsForLowScoreSeg(params);
1365 subMatrix->getQTMatrixForLowScoreSeg(matrix);
1367 const SeqArray* seqArray = alignmentObj.getSeqArray();
1369 gaps.resize(params->nCols + 1);
1371 for (j = 1; j <= params->nCols; j++)
1374 for(i = params->firstSeq + 1; i < params->firstSeq + params->nSeqs; i++)
1376 if (j < alignmentObj.getSeqLength(i))
1378 if (((*seqArray)[i][j] < 0) || ((*seqArray)[i][j] > _maxAA))
1386 // STEP 2: Calculate the profile
1387 LowScoreSegProfile lowScoreProfile(params->nCols, params->firstSeq,
1388 params->firstSeq + params->nSeqs);
1390 weight.resize(params->firstSeq + params->nSeqs + 1);
1392 for(i = params->firstSeq;i < params->firstSeq + params->nSeqs;i++)
1394 weight[i] = (*(params->seqWeight))[i - params->firstSeq];
1397 lowScoreProfile.calcLowScoreSegProfile(seqArray, matrix, &weight);
1399 const SeqArray* profile = lowScoreProfile.getProfilePtr();
1402 for(i = params->firstSeq; i < params->firstSeq + params->nSeqs; i++)
1404 sweight += weight[i];
1407 //Now, use the profile scores to mark segments of each sequence which score badly.
1409 fsum.resize(params->nCols + 2);
1410 bsum.resize(params->nCols + 2);
1411 pscore.resize(params->nCols + 2);
1413 for(i = params->firstSeq + 1; i < params->firstSeq + params->nSeqs + 1; i++)
1415 // In a forward phase, sum the profile scores. Mark negative sums as exceptions.
1416 //If the sum is positive, then it gets reset to 0.
1418 for(j = 1; j <= alignmentObj.getSeqLength(i); j++)
1420 gscale = (float)(params->nSeqs - gaps[j - 1]) / (float)params->nSeqs;
1421 if((*seqArray)[i][j] < 0 || (*seqArray)[i][j] >= _maxAA)
1423 pscore[j - 1] = 0.0;
1428 pscore[j-1]=((*profile)[j][(*seqArray)[i][j]]- weight[i - 1] *
1429 matrix[(*seqArray)[i][j]][(*seqArray)[i][j]]) * gscale / sweight;
1431 sum += pscore[j - 1];
1438 // trim off any positive scoring residues from the end of the segments
1440 for(j = alignmentObj.getSeqLength(i) - 1; j >= 0; j--)
1442 if(prevSum >= 0.0 && fsum[j] < 0.0 && pscore[j] >= 0.0)
1449 // Now, in a backward phase, do the same summing process.
1451 for(j = alignmentObj.getSeqLength(i); j >= 1; j--)
1453 if((*seqArray)[i][j] < 0 || (*seqArray)[i][j] >= _maxAA)
1459 sum += pscore[j - 1];
1467 // trim off any positive scoring residues from the start of the segments
1469 for(j = 0; j < alignmentObj.getSeqLength(i); j++)
1471 if(prevSum >= 0.0 && bsum[j] < 0.0 && pscore[j] >= 0.0)
1477 //Mark residues as exceptions if they score negative in the forward AND backward directions.
1478 for(j = 1; j <= alignmentObj.getSeqLength(i); j++)
1480 if(fsum[j - 1] < 0.0 && bsum[j - 1] < 0.0)
1482 if((*seqArray)[i][j] >= 0 && (*seqArray)[i][j] < _maxAA)
1484 (*(params->lowScoreRes))[i - params->firstSeq - 1][j - 1] = -1;
1490 // Finally, apply the length cutoff to the segments - removing segments shorter
1493 QTremoveShortSegments(params);
1497 void Clustal::QTremoveShortSegments(LowScoreSegParams* params)
1502 //GetPanelExtra(p,&data);
1503 if(params->nSeqs <= 0)
1506 // Reset all the exceptions - a value of 1 indicates an exception that
1507 // will be displayed. A value of -1 is used to remember exceptions that
1508 // are temporarily hidden in the display
1510 for(i = 0; i < params->nSeqs; i++)
1512 for(j = 0; j < params->nCols; j++)
1514 if((*(params->lowScoreRes))[i][j] == -1)
1516 (*(params->lowScoreRes))[i][j] = 1;
1521 for(i = 0; i < params->nSeqs; i++)
1524 for(j = 0; j <= params->nCols; j++)
1528 if((*(params->lowScoreRes))[i][j] == 1)
1533 if(j == params->nCols || (*(params->lowScoreRes))[i][j] == 0)
1535 if(j - start < userParameters->getQTminLenLowScoreSegment())
1537 for(k = start; k < j; k++)
1539 (*(params->lowScoreRes))[i][k] = -1;
1550 * Change: Mark 22-5-07, changed the distmatrix to be the size of alignObject.numSeqs
1552 void Clustal::QTcalcWeightsForLowScoreSeg(LowScoreSegParams* params)
1557 DistMatrix distMat(alignmentObj.getNumSeqs() + 1); // Mark: changed size
1558 // Aw potential trouble here: what if we don't have write
1559 // permission to current directory?
1561 char treeName[FILENAMELEN]=".score.ph";
1563 char treeName[FILENAMELEN]="tmp.ph";
1566 if(params->nSeqs <= 0)
1571 // if sequence weights have been calculated before - don't bother
1572 //doing it again (it takes too long). data.seqweight is set to NULL when
1573 // new sequences are loaded. /
1574 if(params->seqWeightCalculated == true)
1579 utilityObject->info("Calculating sequence weights...");
1581 // count pairwise percent identities to make a phylogenetic tree //
1582 if(params->nSeqs >= 2)
1583 { //i=firstSeq + 1; i <= firstSeq + nSeqs
1584 for (i = params->firstSeq + 1; i <= params->firstSeq + params->nSeqs; i++)
1585 { //j=i + 1; i <= firstSeq + nSeqs
1586 for (j = i + 1; j <= params->firstSeq + params->nSeqs; j++) // Mark 22-5-07
1588 dscore = alignmentObj.countid(i, j); // Mark 22-5-07
1589 distMat(i, j) = (100.0 - dscore) / 100.0;
1590 distMat(j, i) = distMat(i, j);
1593 string name = string(treeName);
1594 bool success = false;
1595 weight.resize(params->firstSeq + params->nSeqs + 1);
1597 tree.getWeightsForQtLowScore(&weight, &distMat, &alignmentObj, params->firstSeq + 1,
1598 params->nSeqs, &name, &success); // Mark change 10-5-07
1604 for(i = params->firstSeq;i < params->firstSeq + params->nSeqs;i++)
1606 (*(params->seqWeight))[i - params->firstSeq] = weight[i];
1609 utilityObject->info("Done.");
1613 void Clustal::QTSetFileNamesForOutput(AlignmentFileNames fileNames)
1615 QTFileNames = fileNames;
1618 bool Clustal::QTRealignSelectedRange(AlignmentFileNames fileNames, int beginPos, int endPos, bool realignEndGapPen)
1620 bool alignEndGapPen = userParameters->getEndGapPenalties();
1622 Alignment saveOldAlign = alignmentObj; // Take a copy of it. Note provided copy
1623 // constructor is ok.
1625 ok = alignmentObj.removeAllOutsideRange(beginPos, endPos);
1629 alignmentObj = saveOldAlign;
1632 // Temporarily set the alignment output to be input
1633 int saveOutOrder = userParameters->getOutputOrder();
1634 userParameters->setOutputOrder(INPUT);
1636 //set end gap penalties to be realignEndGapPenalties
1637 userParameters->setEndGapPenalties(realignEndGapPen);
1639 if(alignmentObj.getNumSeqs() <= 0)
1641 alignmentObj = saveOldAlign;
1644 QTSetFileNamesForOutput(fileNames);
1645 string phylipName = fileNames.treeFile;
1646 align(&phylipName, false);
1648 userParameters->setOutputOrder(saveOutOrder);
1650 // reset the end gap penalties
1651 userParameters->setEndGapPenalties(alignEndGapPen);
1653 // remove postions that only contain gaps
1654 int nSeqs = alignmentObj.getNumSeqs();
1655 alignmentObj.removeAllGapOnlyColumns(1, nSeqs, 0);
1657 // save it to a temporary area.
1658 SeqArray realignedArea = *(alignmentObj.getSeqArray());
1660 // Paste it back into the original alignment.
1661 alignmentObj = saveOldAlign;
1663 result = alignmentObj.updateRealignedRange(realignedArea, beginPos, endPos);
1666 utilityObject->error("something went wrong while updating the realigned range\n");
1669 // output the alignments
1670 AlignmentOutput alignOutput;
1671 if(!alignOutput.QTOpenFilesForOutput(QTFileNames))
1673 return false; // could not open the files.
1676 alignOutput.createAlignmentOutput(&alignmentObj, 1, nSeqs);
1681 void Clustal::test()
1683 cout << "RUNNING TEST\n";
1684 AlignmentOutput alignOutput;
1686 utilityObject->getPath(userParameters->getSeqName(), &path);
1688 if(!alignOutput.openAlignmentOutput(path))
1690 cerr << "could not open the file\n";
1694 vector<int> selected;
1695 int nSeqs = alignmentObj.getNumSeqs();
1696 selected.resize(nSeqs + 1, 0);
1700 alignmentObj.removeGapOnlyColsFromSelectedSeqs(&selected);
1701 alignOutput.createAlignmentOutput(&alignmentObj, 1, nSeqs);
1705 * This function is used to ask the user if they would like to use an existing guide tree.
1706 * Note: It expects that phylipName actually points to a string that has been allocated.
1708 bool Clustal::useExistingGuideTree(int type, string* phylipName, const string& path)
1710 bool useTree = false;
1712 InFileStream _treeFile; //nige
1716 if(type == Sequences)
1718 ptrToMsg = &sequencesMsg;
1719 paramUseTree = userParameters->getUseTreeFile();
1721 else if(type == Profile1)
1723 ptrToMsg = &profile1Msg;
1724 paramUseTree = userParameters->getUseTree1File();;
1726 else if(type == Profile2)
1728 ptrToMsg = &profile2Msg;
1729 paramUseTree = userParameters->getUseTree2File();;
1733 ptrToMsg = &sequencesMsg;
1734 paramUseTree = userParameters->getUseTreeFile();
1737 if (checkTree && userParameters->getMenuFlag())
1739 treeName = path + "dnd";
1741 _treeFile.open(treeName.c_str());
1742 _treeFile.seekg(0, std::ios::beg);
1744 if(_treeFile.is_open())
1746 string message = *ptrToMsg + treeName + " (y/n) ? [y]";
1748 utilityObject->getStr(message, answer);
1750 if(answer[0] != 'n' && answer[0] != 'N')
1754 phylipName = new string(treeName);
1758 *phylipName = treeName;
1765 else if (!userParameters->getMenuFlag() && paramUseTree)
1773 void Clustal::promptForNewGuideTreeName(int type, string* treeName, const string& path)
1776 if(type == Profile1)
1778 ptrToMsg = &newProfile1TreePrompt;
1780 else if(type == Profile2)
1782 ptrToMsg = &newProfile2TreePrompt;
1786 ptrToMsg = &newProfile1TreePrompt;
1791 treeName = new string("");
1794 while(treeName->empty())
1796 string message = *ptrToMsg + path + "dnd]";
1798 utilityObject->getStr(message, answer);
1801 answer = path + "dnd";