4 * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson.
10 #include "FastPairwiseAlign.h"
15 FastPairwiseAlign::FastPairwiseAlign()
20 void FastPairwiseAlign::pairwiseAlign(Alignment *alignPtr, DistMatrix *distMat, int iStart,
21 int iEnd, int jStart, int jEnd)
25 if(distMat->getSize() != alignPtr->getNumSeqs() + 1)
27 cerr << "The distance matrix is not the right size!\n"
28 << "Need to terminate program.\n";
31 if((iStart < 0) || (iEnd < iStart) || (jStart < 0) || (jEnd < jStart))
33 cout << "The range for pairwise Alignment is incorrect.\n"
34 << "Need to terminate program.\n";
40 bool _DNAFlag = userParameters->getDNAFlag();
41 _maxAlnLength = alignPtr->getMaxAlnLength();
42 int num = (2 * _maxAlnLength) + 1;
43 accum.ResizeRect(5, num);
47 diagIndex.resize(num);
49 zza.resize(_maxAlnLength + 1);
50 zzb.resize(_maxAlnLength + 1);
51 zzc.resize(_maxAlnLength + 1);
52 zzd.resize(_maxAlnLength + 1);
56 userParameters->setDNAParams();
60 userParameters->setProtParams();
65 for (i = iStart + 1; i <= iEnd; ++i)
67 const vector<int>* _seqIPtr = alignPtr->getSequence(i);
68 int _seqILength = alignPtr->getSeqLength(i);
71 makeNPtrs(zza, zzc, _seqIPtr, _seqILength);
75 makePPtrs(zza, zzc, _seqIPtr, _seqILength);
78 for (j = jStart + 2 > i+1 ? jStart + 2 : i+1; j <= jEnd; ++j)
80 const vector<int>* _seqJPtr = alignPtr->getSequence(j);
81 int _seqJLength = alignPtr->getSeqLength(j);
84 makeNPtrs(zzb, zzd, _seqJPtr, _seqJLength);
88 makePPtrs(zzb, zzd, _seqJPtr, _seqJLength);
90 pairAlign(_seqIPtr, _seqILength, _seqJLength);
97 calcScore = (double)accum[0][maxSoFar];
98 if (userParameters->getPercent())
100 dsr = (_seqILength < _seqJLength) ? _seqILength
102 calcScore = (calcScore / (double)dsr) *100.0;
105 _score = (100.0 - calcScore) / 100.0;
106 distMat->SetAt(i, j, _score);
107 //distMat->SetAt(j, i, _score); /* distMat symmetric, FS, 2009-04-06 */
110 if(userParameters->getDisplayInfo())
114 utilityObject->info("Sequences (%d:%d) Aligned. Score: %lg",
119 utilityObject->info("Sequences (%d:%d) Not Aligned", i, j);
134 catch(const exception& e)
136 cerr << "An exception has occured in the FastPairwiseAlign class.\n"
144 * Note: There is a problem with the treatment of DNA/RNA.
145 * During file reading all residues are encoded as AminoAcids,
146 * even before it has been established if they are AA or not.
147 * This is bad and will have to be changed (later).
148 * 'A' is assigned code 0, C is assigned 2, G = 6, T=18, U=19.
149 * However, the fast alignment routines require that
150 * A=0, C=1, G=2, T=U=3. In the best case the results of the
151 * fast alignment (of DNA) will simply be meaningless, the
152 * worst case is a core dump.
153 * As a quick fix I implemented the following mask, that
154 * (for DNA/RNA) translates 0->0, 2->1, 6->2, 18->3, 19->3.
155 * This is awfull (it is not OO compliant) but it was quick,
156 * uses (much) less memory than a second DNA array, and is
157 * (much) faster than calling a translation function.
158 * Ideally this will be removed, but this requires changes
159 * to (i) the sequence encoding during file-reading AND
160 * (ii) all the DNA substitution matrices. (Fabian, 2009-02-25)
162 * A B C D E F G H I K L M 0 N P Q R S
163 * T U W X Y Z (goodmeasuregoodmeasuregood) */
164 int ziAA2DNA[] = {0,-1, 1,-1,-1,-1, 2,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
165 3, 3,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1};
166 const int ciMaxResID = 3;
171 void FastPairwiseAlign::pairAlign(const vector<int>* seq, int l1, int l2)
173 int pot[8], i, j, l, m, limit, pos, tl1, vn1, vn2, flen, osptr, fs;
174 int tv1, tv2, encrypt, subt1, subt2, rmndr;
177 bool _DNAFlag = userParameters->getDNAFlag();
178 int _ktup = userParameters->getKtup();
179 int _maxAA = userParameters->getMaxAA();
180 int _windowGap = userParameters->getWindowGap();
181 int _window = userParameters->getWindow();
182 int _signif = userParameters->getSignif();
186 for (i = 1; i <= _ktup; ++i)
188 pot[i] = (int)pow((double)4, (double)(i - 1));
190 limit = (int)pow((double)4, (double)_ktup);
194 for (i = 1; i <= _ktup; i++)
196 pot[i] = (int)pow((double)(_maxAA + 1), (double)(i - 1));
198 limit = (int)pow((double)(_maxAA + 1), (double)_ktup);
202 for (i = 1; i <= tl1; ++i)
204 slopes[i] = displ[i] = 0;
209 // increment diagonal score for each k_tuple match
211 for (i = 1; i <= limit; ++i)
223 osptr = vn1 - vn2 + l2;
231 // choose the top SIGNIF diagonals
233 desQuickSort(displ, diagIndex, tl1);
235 j = tl1 - _signif + 1;
241 // flag all diagonals within WINDOW of a top diagonal
243 for (i = tl1; i >= j; i--)
247 l = (1 > pos - _window) ? 1 : pos - _window;
248 m = (tl1 < pos + _window) ? tl1 : pos + _window;
255 for (i = 1; i <= tl1; i++)
261 currFrag = maxSoFar = 0;
263 for (i = 1; i <= (l1 - _ktup + 1); ++i)
267 for (j = 1; j <= _ktup; ++j)
269 residue = ziAA2DNA[(*seq)[i + j - 1]];
270 if ((residue < 0) || (residue > ciMaxResID))
275 encrypt += ((residue) * pot[j]);
279 for (j = 1; j <= _ktup; ++j)
281 residue = (*seq)[i + j - 1];
282 if ((residue < 0) || (residue > _maxAA))
287 encrypt += ((residue) * pot[j]);
307 osptr = i - vn2 + l2;
308 if (slopes[osptr] != 1)
326 if (currFrag >= 2 * _maxAlnLength)
328 utilityObject->info("(Partial alignment)");
332 displ[osptr] = currFrag;
333 putFrag(fs, i, vn2, flen);
337 tv1 = accum[1][next];
338 tv2 = accum[2][next];
339 if (fragRelPos(i, vn2, tv1, tv2))
341 if (i - vn2 == accum[1][next] - accum[2][next])
343 if (i > accum[1][next] + (_ktup - 1))
345 fs = accum[0][next] + _ktup;
349 rmndr = i - accum[1][next];
350 fs = accum[0][next] + rmndr;
358 if (displ[osptr] == 0)
364 if (i > accum[1][displ[osptr]] + (_ktup - 1))
366 subt1 = accum[0][displ[osptr]] + _ktup;
370 rmndr = i - accum[1][displ[osptr]];
371 subt1 = accum[0][displ[osptr]] + rmndr;
374 subt2 = accum[0][next] - _windowGap + _ktup;
391 next = accum[4][next];
404 void FastPairwiseAlign::makePPtrs(vector<int>& tptr, vector<int>& pl, const vector<int>* seq, int length)
407 int i, j, limit, code;
410 int _ktup = userParameters->getKtup();
411 int _maxAA = userParameters->getMaxAA();
413 for (i = 1; i <= _ktup; i++)
415 a[i] = (int)pow((double)(_maxAA + 1), (double)(i - 1));
418 limit = (int)pow((double)(_maxAA + 1), (double)_ktup);
419 if(limit >= (int)pl.size())
421 pl.resize(limit + 1);
423 if(length >= (int)tptr.size())
425 tptr.resize(length + 1);
428 for (i = 1; i <= limit; ++i)
432 for (i = 1; i <= length; ++i) // NOTE changed this
437 for (i = 1; i <= (length - _ktup + 1); ++i)
441 for (j = 1; j <= _ktup; ++j)
443 residue = (*seq)[i + j - 1];
444 if ((residue < 0) || (residue > _maxAA))
449 code += ((residue) * a[j]);
460 pl[code] = i; //Ktuple code is at position i in sequence
464 void FastPairwiseAlign::makeNPtrs(vector<int>& tptr,vector<int>& pl, const vector<int>* seq, int length)
468 0, 1, 4, 16, 64, 256, 1024, 4096
470 int i, j, limit, code;
473 int _ktup = userParameters->getKtup();
475 limit = (int)pow((double)4, (double)_ktup);
477 if(limit >= (int)pl.size())
479 pl.resize(limit + 1);
481 if(length >= (int)tptr.size())
483 tptr.resize(length + 1);
486 for (i = 1; i <= limit; ++i)
490 for (i = 1; i <= length; ++i)
495 for (i = 1; i <= length - _ktup + 1; ++i)
499 for (j = 1; j <= _ktup; ++j)
501 residue = ziAA2DNA[(*seq)[i + j - 1]];
502 if ((residue < 0) || (residue > ciMaxResID))
507 code += ((residue) * pot[j]);
523 void FastPairwiseAlign::putFrag(int fs, int v1, int v2, int flen)
526 accum[0][currFrag] = fs;
527 accum[1][currFrag] = v1;
528 accum[2][currFrag] = v2;
529 accum[3][currFrag] = flen;
534 accum[4][currFrag] = 0;
538 if (fs >= accum[0][maxSoFar])
540 accum[4][currFrag] = maxSoFar;
550 next = accum[4][next];
551 if (fs >= accum[0][next])
556 accum[4][currFrag] = next;
557 accum[4][end] = currFrag;
561 inline int FastPairwiseAlign::fragRelPos(int a1, int b1, int a2, int b2)
564 int _ktup = userParameters->getKtup();
567 if (a1 - b1 == a2 - b2)
576 if (a2 + _ktup - 1 < a1 && b2 + _ktup - 1 < b1)
584 void FastPairwiseAlign::desQuickSort(vector<int>& array1, vector<int>& array2, int arraySize)
586 // Quicksort routine, adapted from chapter 4, page 115 of software tools
587 // by Kernighan and Plauger, (1986)
588 // Sort the elements of array1 and sort the
589 // elements of array2 accordingly
593 int lst[50], ust[50]; // the maximum no. of elements must be
594 // < log(base2) of 50
597 ust[1] = arraySize - 1;
602 if (lst[p] >= ust[p])
613 for (i = i + 1; array1[i] < pivlin; i++)
615 for (j = j - 1; j > i; j--)
616 if (array1[j] <= pivlin)
623 array1[i] = array1[j];
627 array2[i] = array2[j];
635 array1[i] = array1[j];
639 array2[i] = array2[j];
642 if (i - lst[p] < ust[p] - i)