/** * Author: Mark Larkin * * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson. */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include "FullPairwiseAlign.h" #include namespace clustalw { FullPairwiseAlign::FullPairwiseAlign() : _maxAlnLength(0), intScale(0), mmScore(0), printPtr(0), lastPrint(0), _gapOpen(0), _gapExtend(0), seq1(0), seq2(0), maxScore(0), sb1(0), sb2(0), se1(0), se2(0) { } void FullPairwiseAlign::pairwiseAlign(Alignment *alignPtr, DistMatrix *distMat, int iStart, int iEnd, int jStart, int jEnd) { int si, sj, i; int n, m, len1, len2; int maxRes; int _matAvgScore; int res; double _score; float gapOpenScale, gapExtendScale; try { if(distMat->getSize() != alignPtr->getNumSeqs() + 1) { cerr << "The distance matrix is not the right size!\n" << "Need to terminate program.\n"; exit(1); } if((iStart < 0) || (iEnd < iStart) || (jStart < 0) || (jEnd < jStart)) { cerr << "The range for pairwise Alignment is incorrect.\n" << "Need to terminate program.\n"; exit(1); } _maxAlnLength = alignPtr->getMaxAlnLength(); int _numSeqs = alignPtr->getNumSeqs(); if(_numSeqs == 0) { return; } int num = (2 * _maxAlnLength) + 1; bool _DNAFlag = userParameters->getDNAFlag(); float _pwGapOpen, _pwGapExtend; _pwGapOpen = userParameters->getPWGapOpen(); _pwGapExtend = userParameters->getPWGapExtend(); displ.resize(num); HH.resize(_maxAlnLength); DD.resize(_maxAlnLength); RR.resize(_maxAlnLength); SS.resize(_maxAlnLength); // Note these 2 lines replace the stuff above because it is all done in the SubMatrix PairScaleValues scaleValues; maxRes = subMatrix->getPairwiseMatrix(matrix, scaleValues, _matAvgScore); if (maxRes == 0) { cerr << "Could not get the substitution matrix\n"; return; } intScale = scaleValues.intScale; gapOpenScale = scaleValues.gapOpenScale; gapExtendScale = scaleValues.gapExtendScale; int _gapPos1, _gapPos2; _gapPos1 = userParameters->getGapPos1(); _gapPos2 = userParameters->getGapPos2(); const SeqArray* _ptrToSeqArray = alignPtr->getSeqArray(); //This is faster! for (si = utilityObject->MAX(0, iStart); si < _numSeqs && si < iEnd; si++) { n = alignPtr->getSeqLength(si + 1); len1 = 0; for (i = 1; i <= n; i++) { res = (*_ptrToSeqArray)[si + 1][i]; if ((res != _gapPos1) && (res != _gapPos2)) { len1++; } } for (sj = utilityObject->MAX(si+1, jStart+1); sj < _numSeqs && sj < jEnd; sj++) { m = alignPtr->getSeqLength(sj + 1); if (n == 0 || m == 0) { distMat->SetAt(si + 1, sj + 1, 1.0); distMat->SetAt(sj + 1, si + 1, 1.0); continue; } len2 = 0; for (i = 1; i <= m; i++) { res = (*_ptrToSeqArray)[sj + 1][i]; if ((res != _gapPos1) && (res != _gapPos2)) { len2++; } } if (_DNAFlag) { _gapOpen = static_cast(2 * _pwGapOpen * intScale * gapOpenScale); _gapExtend = static_cast(_pwGapExtend * intScale * gapExtendScale); } else { if (_matAvgScore <= 0) { _gapOpen = 2 * static_cast((_pwGapOpen + log(static_cast(utilityObject->MIN(n, m)))) * intScale); } else { _gapOpen = static_cast(2 * _matAvgScore * (_pwGapOpen + log(static_cast(utilityObject->MIN(n, m)))) * gapOpenScale); } _gapExtend = static_cast(_pwGapExtend * intScale); } // align the sequences seq1 = si + 1; seq2 = sj + 1; _ptrToSeq1 = alignPtr->getSequence(seq1); _ptrToSeq2 = alignPtr->getSequence(seq2); forwardPass(_ptrToSeq1, _ptrToSeq2, n, m); reversePass(_ptrToSeq1, _ptrToSeq2); lastPrint = 0; printPtr = 1; // use Myers and Miller to align two sequences maxScore = diff(sb1 - 1, sb2 - 1, se1 - sb1 + 1, se2 - sb2 + 1, (int)0, (int)0); // calculate percentage residue identity mmScore = tracePath(sb1, sb2); if (len1 == 0 || len2 == 0) { mmScore = 0; } else { mmScore /= (float)utilityObject->MIN(len1, len2); } _score = ((float)100.0 - mmScore) / (float)100.0; distMat->SetAt(si + 1, sj + 1, _score); distMat->SetAt(sj + 1, si + 1, _score); if(userParameters->getDisplayInfo()) { utilityObject->info("Sequences (%d:%d) Aligned. Score: %d", si+1, sj+1, (int)mmScore); } } } displ.clear(); HH.clear(); DD.clear(); RR.clear(); SS.clear(); } catch(const exception& e) { cerr << "An exception has occured in the FullPairwiseAlign class.\n" << e.what() << "\n"; exit(1); } } void FullPairwiseAlign::add(int v) { if (lastPrint < 0) { displ[printPtr - 1] = v; displ[printPtr++] = lastPrint; } else { lastPrint = displ[printPtr++] = v; } } inline int FullPairwiseAlign::calcScore(int iat, int jat, int v1, int v2) { return matrix[(*_ptrToSeq1)[v1 + iat]][(*_ptrToSeq2)[v2 + jat]]; } float FullPairwiseAlign::tracePath(int tsb1, int tsb2) { int res1, res2; int i1, i2; int i, k, pos, toDo; int count; float score; toDo = printPtr - 1; i1 = tsb1; i2 = tsb2; pos = 0; count = 0; for (i = 1; i <= toDo; ++i) { if (displ[i] == 0) { res1 = (*_ptrToSeq1)[i1]; res2 = (*_ptrToSeq2)[i2]; if ((res1 != userParameters->getGapPos1()) && (res2 != userParameters->getGapPos2()) && (res1 == res2)) { count++; } ++i1; ++i2; ++pos; } else { if ((k = displ[i]) > 0) { i2 += k; pos += k; } else { i1 -= k; pos -= k; } } } score = 100.0 *(float)count; return (score); } void FullPairwiseAlign::forwardPass(const vector* seq1, const vector* seq2, int n, int m) { int i, j; int f, hh, p, t; maxScore = 0; se1 = se2 = 0; for (i = 0; i <= m; i++) { HH[i] = 0; DD[i] = -_gapOpen; } for (i = 1; i <= n; i++) { hh = p = 0; f = -_gapOpen; for (j = 1; j <= m; j++) { f -= _gapExtend; t = hh - _gapOpen - _gapExtend; if (f < t) { f = t; } DD[j] -= _gapExtend; t = HH[j] - _gapOpen - _gapExtend; if (DD[j] < t) { DD[j] = t; } hh = p + matrix[(*seq1)[i]][(*seq2)[j]]; if (hh < f) { hh = f; } if (hh < DD[j]) { hh = DD[j]; } if (hh < 0) { hh = 0; } p = HH[j]; HH[j] = hh; if (hh > maxScore) { maxScore = hh; se1 = i; se2 = j; } } } } void FullPairwiseAlign::reversePass(const vector* seq1, const vector* seq2) { int i, j; int f, hh, p, t; int cost; cost = 0; sb1 = sb2 = 1; for (i = se2; i > 0; i--) { HH[i] = - 1; DD[i] = - 1; } for (i = se1; i > 0; i--) { hh = f = - 1; if (i == se1) { p = 0; } else { p = - 1; } for (j = se2; j > 0; j--) { f -= _gapExtend; t = hh - _gapOpen - _gapExtend; if (f < t) { f = t; } DD[j] -= _gapExtend; t = HH[j] - _gapOpen - _gapExtend; if (DD[j] < t) { DD[j] = t; } hh = p + matrix[(*seq1)[i]][(*seq2)[j]]; if (hh < f) { hh = f; } if (hh < DD[j]) { hh = DD[j]; } p = HH[j]; HH[j] = hh; if (hh > cost) { cost = hh; sb1 = i; sb2 = j; if (cost >= maxScore) { break; } } } if (cost >= maxScore) { break; } } } int FullPairwiseAlign::diff(int A, int B, int M, int N, int tb, int te) { int type; int midi, midj, i, j; int midh; static int f, hh, e, s, t; if (N <= 0) { if (M > 0) { del(M); } return ( -(int)tbgap(M, tb)); } if (M <= 1) { if (M <= 0) { add(N); return ( -(int)tbgap(N, tb)); } midh = - (tb + _gapExtend) - tegap(N, te); hh = - (te + _gapExtend) - tbgap(N, tb); if (hh > midh) { midh = hh; } midj = 0; for (j = 1; j <= N; j++) { hh = calcScore(1, j, A, B) - tegap(N - j, te) - tbgap(j - 1, tb); if (hh > midh) { midh = hh; midj = j; } } if (midj == 0) { del(1); add(N); } else { if (midj > 1) { add(midj - 1); } displ[printPtr++] = lastPrint = 0; if (midj < N) { add(N - midj); } } return midh; } // Divide: Find optimum midpoint (midi,midj) of cost midh midi = M / 2; HH[0] = 0; t = - tb; for (j = 1; j <= N; j++) { HH[j] = t = t - _gapExtend; DD[j] = t - _gapOpen; } t = - tb; for (i = 1; i <= midi; i++) { s = HH[0]; HH[0] = hh = t = t - _gapExtend; f = t - _gapOpen; for (j = 1; j <= N; j++) { if ((hh = hh - _gapOpen - _gapExtend) > (f = f - _gapExtend)) { f = hh; } if ((hh = HH[j] - _gapOpen - _gapExtend) > (e = DD[j] - _gapExtend)) { e = hh; } hh = s + calcScore(i, j, A, B); if (f > hh) { hh = f; } if (e > hh) { hh = e; } s = HH[j]; HH[j] = hh; DD[j] = e; } } DD[0] = HH[0]; RR[N] = 0; t = - te; for (j = N - 1; j >= 0; j--) { RR[j] = t = t - _gapExtend; SS[j] = t - _gapOpen; } t = - te; for (i = M - 1; i >= midi; i--) { s = RR[N]; RR[N] = hh = t = t - _gapExtend; f = t - _gapOpen; for (j = N - 1; j >= 0; j--) { if ((hh = hh - _gapOpen - _gapExtend) > (f = f - _gapExtend)) { f = hh; } if ((hh = RR[j] - _gapOpen - _gapExtend) > (e = SS[j] - _gapExtend)) { e = hh; } hh = s + calcScore(i + 1, j + 1, A, B); if (f > hh) { hh = f; } if (e > hh) { hh = e; } s = RR[j]; RR[j] = hh; SS[j] = e; } } SS[N] = RR[N]; midh = HH[0] + RR[0]; midj = 0; type = 1; for (j = 0; j <= N; j++) { hh = HH[j] + RR[j]; if (hh >= midh) if (hh > midh || (HH[j] != DD[j] && RR[j] == SS[j])) { midh = hh; midj = j; } } for (j = N; j >= 0; j--) { hh = DD[j] + SS[j] + _gapOpen; if (hh > midh) { midh = hh; midj = j; type = 2; } } // Conquer recursively around midpoint if (type == 1) { // Type 1 gaps diff(A, B, midi, midj, tb, _gapOpen); diff(A + midi, B + midj, M - midi, N - midj, _gapOpen, te); } else { diff(A, B, midi - 1, midj, tb, 0); del(2); diff(A + midi + 1, B + midj, M - midi - 1, N - midj, 0, te); } return midh; // Return the score of the best alignment } void FullPairwiseAlign::del(int k) { if (lastPrint < 0) { lastPrint = displ[printPtr - 1] -= k; } else { lastPrint = displ[printPtr++] = - (k); } } int FullPairwiseAlign::gap(int k) { if(k <= 0) { return 0; } else { return _gapOpen + _gapExtend * k; } } int FullPairwiseAlign::tbgap(int k, int tb) { if(k <= 0) { return 0; } else { return tb + _gapExtend * k; } } int FullPairwiseAlign::tegap(int k, int te) { if(k <= 0) { return 0; } else { return te + _gapExtend * k; } } }