4 * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson.
9 #include "FullPairwiseAlign.h"
15 FullPairwiseAlign::FullPairwiseAlign()
34 void FullPairwiseAlign::pairwiseAlign(Alignment *alignPtr, DistMatrix *distMat, int iStart, int iEnd, int jStart, int jEnd)
42 float gapOpenScale, gapExtendScale;
47 if(distMat->getSize() != alignPtr->getNumSeqs() + 1)
49 cerr << "The distance matrix is not the right size!\n"
50 << "Need to terminate program.\n";
53 if((iStart < 0) || (iEnd < iStart) || (jStart < 0) || (jEnd < jStart))
55 cerr << "The range for pairwise Alignment is incorrect.\n"
56 << "Need to terminate program.\n";
60 _maxAlnLength = alignPtr->getMaxAlnLength();
62 int _numSeqs = alignPtr->getNumSeqs();
68 int num = (2 * _maxAlnLength) + 1;
69 bool _DNAFlag = userParameters->getDNAFlag();
70 float _pwGapOpen, _pwGapExtend;
71 _pwGapOpen = userParameters->getPWGapOpen();
72 _pwGapExtend = userParameters->getPWGapExtend();
75 HH.resize(_maxAlnLength);
76 DD.resize(_maxAlnLength);
77 RR.resize(_maxAlnLength);
78 SS.resize(_maxAlnLength);
79 // Note these 2 lines replace the stuff above because it is all done in the SubMatrix
80 PairScaleValues scaleValues;
81 maxRes = subMatrix->getPairwiseMatrix(matrix, scaleValues, _matAvgScore);
84 cerr << "Could not get the substitution matrix\n";
88 intScale = scaleValues.intScale;
89 gapOpenScale = scaleValues.gapOpenScale;
90 gapExtendScale = scaleValues.gapExtendScale;
92 int _gapPos1, _gapPos2;
93 _gapPos1 = userParameters->getGapPos1();
94 _gapPos2 = userParameters->getGapPos2();
95 const SeqArray* _ptrToSeqArray = alignPtr->getSeqArray(); //This is faster!
97 for (si = utilityObject->MAX(0, iStart); si < _numSeqs && si < iEnd; si++)
99 n = alignPtr->getSeqLength(si + 1);
101 for (i = 1; i <= n; i++)
103 res = (*_ptrToSeqArray)[si + 1][i];
104 if ((res != _gapPos1) && (res != _gapPos2))
110 for (sj = utilityObject->MAX(si+1, jStart+1); sj < _numSeqs && sj < jEnd; sj++)
112 m = alignPtr->getSeqLength(sj + 1);
113 if (n == 0 || m == 0)
115 distMat->SetAt(si + 1, sj + 1, 1.0);
116 distMat->SetAt(sj + 1, si + 1, 1.0);
120 for (i = 1; i <= m; i++)
122 res = (*_ptrToSeqArray)[sj + 1][i];
123 if ((res != _gapPos1) && (res != _gapPos2))
131 _gapOpen = static_cast<int>(2 * _pwGapOpen * intScale *
133 _gapExtend = static_cast<int>(_pwGapExtend * intScale * gapExtendScale);
137 if (_matAvgScore <= 0)
139 _gapOpen = 2 * static_cast<int>((_pwGapOpen +
140 log(static_cast<double>(utilityObject->MIN(n, m)))) * intScale);
144 _gapOpen = static_cast<int>(2 * _matAvgScore * (_pwGapOpen +
145 log(static_cast<double>(utilityObject->MIN(n, m)))) * gapOpenScale);
147 _gapExtend = static_cast<int>(_pwGapExtend * intScale);
149 // align the sequences
154 _ptrToSeq1 = alignPtr->getSequence(seq1);
155 _ptrToSeq2 = alignPtr->getSequence(seq2);
157 forwardPass(_ptrToSeq1, _ptrToSeq2, n, m);
158 reversePass(_ptrToSeq1, _ptrToSeq2);
163 // use Myers and Miller to align two sequences
165 maxScore = diff(sb1 - 1, sb2 - 1, se1 - sb1 + 1, se2 - sb2 + 1,
168 // calculate percentage residue identity
170 mmScore = tracePath(sb1, sb2);
172 if (len1 == 0 || len2 == 0)
178 mmScore /= (float)utilityObject->MIN(len1, len2);
181 _score = ((float)100.0 - mmScore) / (float)100.0;
182 distMat->SetAt(si + 1, sj + 1, _score);
183 distMat->SetAt(sj + 1, si + 1, _score);
185 if(userParameters->getDisplayInfo())
187 utilityObject->info("Sequences (%d:%d) Aligned. Score: %d",
188 si+1, sj+1, (int)mmScore);
199 catch(const exception& e)
201 cerr << "An exception has occured in the FullPairwiseAlign class.\n"
207 void FullPairwiseAlign::add(int v)
211 displ[printPtr - 1] = v;
212 displ[printPtr++] = lastPrint;
216 lastPrint = displ[printPtr++] = v;
220 inline int FullPairwiseAlign::calcScore(int iat, int jat, int v1, int v2)
222 return matrix[(*_ptrToSeq1)[v1 + iat]][(*_ptrToSeq2)[v2 + jat]];
225 float FullPairwiseAlign::tracePath(int tsb1, int tsb2)
239 for (i = 1; i <= toDo; ++i)
243 res1 = (*_ptrToSeq1)[i1];
244 res2 = (*_ptrToSeq2)[i2];
246 if ((res1 != userParameters->getGapPos1()) &&
247 (res2 != userParameters->getGapPos2()) && (res1 == res2))
257 if ((k = displ[i]) > 0)
270 score = 100.0 *(float)count;
274 void FullPairwiseAlign::forwardPass(const vector<int>* seq1, const vector<int>* seq2, int n, int m)
281 for (i = 0; i <= m; i++)
287 for (i = 1; i <= n; i++)
292 for (j = 1; j <= m; j++)
296 t = hh - _gapOpen - _gapExtend;
303 t = HH[j] - _gapOpen - _gapExtend;
309 hh = p + matrix[(*seq1)[i]][(*seq2)[j]];
337 void FullPairwiseAlign::reversePass(const vector<int>* seq1, const vector<int>* seq2)
345 for (i = se2; i > 0; i--)
351 for (i = se1; i > 0; i--)
363 for (j = se2; j > 0; j--)
367 t = hh - _gapOpen - _gapExtend;
374 t = HH[j] - _gapOpen - _gapExtend;
380 hh = p + matrix[(*seq1)[i]][(*seq2)[j]];
398 if (cost >= maxScore)
404 if (cost >= maxScore)
412 int FullPairwiseAlign::diff(int A, int B, int M, int N, int tb, int te)
415 int midi, midj, i, j;
417 static int f, hh, e, s, t;
426 return ( -(int)tbgap(M, tb));
434 return ( -(int)tbgap(N, tb));
437 midh = - (tb + _gapExtend) - tegap(N, te);
438 hh = - (te + _gapExtend) - tbgap(N, tb);
444 for (j = 1; j <= N; j++)
446 hh = calcScore(1, j, A, B) - tegap(N - j, te) - tbgap(j - 1, tb);
465 displ[printPtr++] = lastPrint = 0;
474 // Divide: Find optimum midpoint (midi,midj) of cost midh
479 for (j = 1; j <= N; j++)
481 HH[j] = t = t - _gapExtend;
482 DD[j] = t - _gapOpen;
486 for (i = 1; i <= midi; i++)
489 HH[0] = hh = t = t - _gapExtend;
491 for (j = 1; j <= N; j++)
493 if ((hh = hh - _gapOpen - _gapExtend) > (f = f - _gapExtend))
497 if ((hh = HH[j] - _gapOpen - _gapExtend) > (e = DD[j] - _gapExtend))
501 hh = s + calcScore(i, j, A, B);
521 for (j = N - 1; j >= 0; j--)
523 RR[j] = t = t - _gapExtend;
524 SS[j] = t - _gapOpen;
528 for (i = M - 1; i >= midi; i--)
531 RR[N] = hh = t = t - _gapExtend;
534 for (j = N - 1; j >= 0; j--)
537 if ((hh = hh - _gapOpen - _gapExtend) > (f = f - _gapExtend))
541 if ((hh = RR[j] - _gapOpen - _gapExtend) > (e = SS[j] - _gapExtend))
545 hh = s + calcScore(i + 1, j + 1, A, B);
564 midh = HH[0] + RR[0];
567 for (j = 0; j <= N; j++)
571 if (hh > midh || (HH[j] != DD[j] && RR[j] == SS[j]))
578 for (j = N; j >= 0; j--)
580 hh = DD[j] + SS[j] + _gapOpen;
589 // Conquer recursively around midpoint
595 diff(A, B, midi, midj, tb, _gapOpen);
596 diff(A + midi, B + midj, M - midi, N - midj, _gapOpen, te);
600 diff(A, B, midi - 1, midj, tb, 0);
602 diff(A + midi + 1, B + midj, M - midi - 1, N - midj, 0, te);
605 return midh; // Return the score of the best alignment
608 void FullPairwiseAlign::del(int k)
612 lastPrint = displ[printPtr - 1] -= k;
616 lastPrint = displ[printPtr++] = - (k);
620 int FullPairwiseAlign::gap(int k)
628 return _gapOpen + _gapExtend * k;
632 int FullPairwiseAlign::tbgap(int k, int tb)
640 return tb + _gapExtend * k;
644 int FullPairwiseAlign::tegap(int k, int te)
652 return te + _gapExtend * k;