4 * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson.
7 * Mark 30-5-2007: Changed iterationOnTreeNode function as it was adding in extra gaps
8 * at the end of an alignment.
13 #include "Iteration.h"
14 #include "../alignment/ObjectiveScore.h"
15 #include "../general/utils.h"
16 #include "../general/userparams.h"
17 #include "../tree/TreeInterface.h"
18 #include "../clustalw_version.h"
24 bool Iteration::iterationOnTreeNode(int numSeqsProf1, int numSeqsProf2, int& prfLength1,
25 int& prfLength2, SeqArray* seqArray)
27 Alignment alignmentToIterate;
28 int numSeqsInProfiles = numSeqsProf1 + numSeqsProf2;
30 if(numSeqsInProfiles <= 2)
36 profileSeqs.resize(numSeqsInProfiles + 1);
39 for (int j = 0; ((j < numSeqsProf1 + numSeqsProf2) &&
40 (j < (int)seqArray->size())); j++)
42 profileSeqs[j + 1].clear();
43 profileSeqs[j + 1].resize(prfLength1 + 1);
44 for (int i = 0; i < prfLength1 && i < (int)(*seqArray)[j].size(); i++)
46 profileSeqs[j + 1][i + 1] = (*seqArray)[j][i];
50 alignmentToIterate.addSequences(&profileSeqs);
51 //userParameters->setNumIterations(numSeqsInProfiles * 2);
54 changed = removeFirstIterate(&alignmentToIterate);
58 SeqArray* iteratedSeqs = alignmentToIterate.getSeqArrayForRealloc();
59 string aaCodes = userParameters->getAminoAcidCodes();
61 int newPrf1Length = 0, newPrf2Length = 0;
63 for (int j = 0; j < numSeqsProf1 + numSeqsProf2; j++)
67 if(alignmentToIterate.getSeqLength(j + 1) > newPrf1Length)
69 newPrf1Length = alignmentToIterate.getSeqLength(j + 1);
72 else if(j < numSeqsProf1 + numSeqsProf2)
74 if(alignmentToIterate.getSeqLength(j + 1) > newPrf2Length)
76 newPrf2Length = alignmentToIterate.getSeqLength(j + 1);
81 prfLength1 = newPrf1Length; // mark 30-5-2007
82 prfLength2 = newPrf2Length; // mark 30-5-2007
84 for (int j = 0; j < numSeqsProf1 + numSeqsProf2; j++)
86 // I need to recalculate the prfLength1 and prfLength2
87 (*seqArray)[j].clear();
88 (*seqArray)[j].assign((*iteratedSeqs)[j + 1].begin() + 1,
89 (*iteratedSeqs)[j + 1].end());
90 (*seqArray)[j].resize(prfLength1 + extraEndElemNum, 31);
91 (*seqArray)[j][prfLength1] = ENDALN;
98 void Iteration::printSeqArray(SeqArray* arrayToPrint)
100 cout << "HERE IS THE SEQARRAY\n";
101 // I need to use iterators for everything here.
102 SeqArray::iterator mainBeginIt = arrayToPrint->begin();
103 SeqArray::iterator mainEndIt = arrayToPrint->end();
104 vector<int>::iterator begin, end;
105 string aaCodes = userParameters->getAminoAcidCodes();
107 for(; mainBeginIt != mainEndIt; mainBeginIt++)
109 if(mainBeginIt->size() > 0)
111 begin = mainBeginIt->begin() + 1;
112 end = mainBeginIt->end();
113 for(; begin != end; begin++)
115 if(*begin < (int)aaCodes.size())
117 cout << aaCodes[*begin];
131 * The function removeFirstIterate is used to perform the remove first iteration
132 * strategy on the finished alignment. It optimises the score give in alignScore.
133 * For iter = 1 to numIterations
134 * For seq i = 1 to numSeqs
136 * if either of the profiles has all gaps, remove this column.
137 * realign using profileAlign
138 * if its better, keep it. If its not better, dont keep it.
139 * @param alnPtr The alignment object.
140 * @return true if it has been successful, false if it has not been successful.
142 bool Iteration::removeFirstIterate(Alignment* alnPtr)
152 int nSeqs = alnPtr->getNumSeqs();
159 distMat.ResizeRect(nSeqs + 1);
161 ObjectiveScore scoreObj;
162 int iterate = userParameters->getDoRemoveFirstIteration();
163 userParameters->setDoRemoveFirstIteration(NONE);
165 double firstScore = scoreObj.getScore(alnPtr);
166 //cout << "firstScore = " << firstScore << "\n";
168 double bestScore = firstScore;
171 bool scoreImproved = false;
172 bool scoreImprovedAnyIteration = false;
173 int prof1NumSeqs = 1;
175 // This will be used for removing gaps!!!
176 vector<int> profile1;
177 vector<int> profile2;
178 profile1.resize(nSeqs + 1, 0);
180 profile2.resize(nSeqs + 1, 1);
183 vector<int> prof1Weight, prof2Weight;
184 int iterations = userParameters->getNumIterations();
185 //cout << "Max num iterations = " << iterations << "\n";
186 Alignment bestAlignSoFar;
188 // One iteration consists of removing each of the sequences, reseting all the gap
189 // only columns. If the score is better, the new alignment is kept.
190 for(int n = 1; n <= iterations; n++)
192 scoreImproved = false;
193 cout << "ITERATION " << n << " OF " << iterations << "\n";
194 for(int i = 1; i <= nSeqs; i++)
196 vector<Sequence> seqVector;
197 Alignment iterateAlign = *alnPtr;
199 iterateAlign.setProfile1NumSeqs(1);
201 // We remove the sequence i from the profile, and paste into the first position
202 // This is to make it easy to do the profile alignment.
203 vector<int> selected;
204 selected.resize(nSeqs + 1, 0);
206 seqVector = iterateAlign.cutSelectedSequencesFromAlignment(&selected);
207 iterateAlign.pasteSequencesIntoPosition(&seqVector, 0);
209 // Remove any gap only columns
210 iterateAlign.removeGapOnlyColsFromSelectedSeqs(&profile1);
211 iterateAlign.removeGapOnlyColsFromSelectedSeqs(&profile2);
213 // Calculate a simple distance matrix.
216 for (int i = 1; i <= nSeqs; i++)
218 for (int j = i + 1; j <= nSeqs; j++)
220 dscore = iterateAlign.countid(i, j);
221 distMat(i, j) = (100.0 - dscore)/100.0;
225 /* temporary tree file
227 * can't use the safer mkstemp function here, because
228 * we just pass down the filename :(
230 char buffer[L_tmpnam];
232 p2TreeName = buffer + string(".dnd");
233 // should test here if file is writable
235 bool success = false;
237 prof1Weight.resize(prof1NumSeqs);
239 prof2Weight.resize(nSeqs);
241 tree.getWeightsForProfileAlign(&iterateAlign, &distMat, &p1TreeName, &prof1Weight,
242 &p2TreeName, &prof2Weight, nSeqs, prof1NumSeqs,
243 false, false, &success);
244 remove(p2TreeName.c_str());
247 /* returning false only means alignment hasn't
248 * changed, but here getWeightsForProfileAlign failed,
249 * most likely because p2TreeName couldn't be read. an
250 * error will be printed to console. clustalw should
251 * then exit, FIXME: clustalx users have to sit
252 * through all error messages until someone
253 * implements a way to return an exit code and react
256 // does anyone know how to use
257 // (userParameters->getMenuFlag() ||
258 // !userParameters->getInteractive() instead?
260 utilityObject->myname(buf);
261 if (strcasecmp(buf, "clustalw")==0) {
264 // the next two lines were here before the exit
265 // was added. keeping it for clustalx although it
266 // doesnt seem to make any sens
267 userParameters->setDoRemoveFirstIteration(iterate);
272 MSA* msaObj = new MSA();
274 iterateAlign.resetProfile1();
275 iterateAlign.resetProfile2();
276 // Do the profile alignment.
277 count = msaObj->doProfileAlign(&iterateAlign, &distMat,
278 &prof1Weight, &prof2Weight);
280 // Check if its better
281 score = scoreObj.getScore(&iterateAlign);
282 iterateAlign.setProfile1NumSeqs(0);
284 if(score < bestScore) // Might be a problem with this.
286 //cout << "**********************************************\n";
287 //cout << "***** Better score found using iteration *****\n";
288 //cout << "**********************************************\n";
290 bestAlignSoFar = iterateAlign;
291 scoreImproved = true;
292 scoreImprovedAnyIteration = true;
294 distMat.clearArray();
295 distMat.ResizeRect(nSeqs + 1);
297 if(scoreImproved == false)
299 cout << "Score was not improved in last iteration. Exiting...\n";
305 // NOTE if we have improved it, then we need to update the sequences in alnPtr
306 // 1) get the unique id of seq i
307 // 2) get the sequence from new object using id
308 // 3) update the sequence in alnPtr
310 if(scoreImprovedAnyIteration) // If we need to update the alnPtr object.
312 cout << "Iteration improved Align score: " << bestScore << "\n";
314 const vector<int>* improvedSeq;
315 for(int i = 1; i <= nSeqs; i++) // For each seq in alnPtr
317 seqId = alnPtr->getUniqueId(i);
318 improvedSeq = bestAlignSoFar.getSequenceFromUniqueId(seqId);
319 alnPtr->updateSequence(i, improvedSeq);
322 cout << "FINAL score: " << bestScore << "\n";
323 userParameters->setDoRemoveFirstIteration(iterate);
324 return true; // It was successful.