+++ /dev/null
-/**
- * Author: Mark Larkin
- *
- * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson.
- */
-#ifdef HAVE_CONFIG_H
- #include "config.h"
-#endif
-#include "FullPairwiseAlign.h"
-#include <math.h>
-
-namespace clustalw
-{
-
-FullPairwiseAlign::FullPairwiseAlign()
-: _maxAlnLength(0),
- intScale(0),
- mmScore(0),
- printPtr(0),
- lastPrint(0),
- _gapOpen(0),
- _gapExtend(0),
- seq1(0),
- seq2(0),
- maxScore(0),
- sb1(0),
- sb2(0),
- se1(0),
- se2(0)
-{
-
-}
-
-void FullPairwiseAlign::pairwiseAlign(Alignment *alignPtr, DistMatrix *distMat, int iStart, int iEnd, int jStart, int jEnd)
-{
- int si, sj, i;
- int n, m, len1, len2;
- int maxRes;
- int _matAvgScore;
- int res;
- double _score;
- float gapOpenScale, gapExtendScale;
-
- try
- {
-
- if(distMat->getSize() != alignPtr->getNumSeqs() + 1)
- {
- cerr << "The distance matrix is not the right size!\n"
- << "Need to terminate program.\n";
- exit(1);
- }
- if((iStart < 0) || (iEnd < iStart) || (jStart < 0) || (jEnd < jStart))
- {
- cerr << "The range for pairwise Alignment is incorrect.\n"
- << "Need to terminate program.\n";
- exit(1);
- }
-
- _maxAlnLength = alignPtr->getMaxAlnLength();
-
- int _numSeqs = alignPtr->getNumSeqs();
- if(_numSeqs == 0)
- {
- return;
- }
-
- int num = (2 * _maxAlnLength) + 1;
- bool _DNAFlag = userParameters->getDNAFlag();
- float _pwGapOpen, _pwGapExtend;
- _pwGapOpen = userParameters->getPWGapOpen();
- _pwGapExtend = userParameters->getPWGapExtend();
-
- displ.resize(num);
- HH.resize(_maxAlnLength);
- DD.resize(_maxAlnLength);
- RR.resize(_maxAlnLength);
- SS.resize(_maxAlnLength);
- // Note these 2 lines replace the stuff above because it is all done in the SubMatrix
- PairScaleValues scaleValues;
- maxRes = subMatrix->getPairwiseMatrix(matrix, scaleValues, _matAvgScore);
- if (maxRes == 0)
- {
- cerr << "Could not get the substitution matrix\n";
- return;
- }
-
- intScale = scaleValues.intScale;
- gapOpenScale = scaleValues.gapOpenScale;
- gapExtendScale = scaleValues.gapExtendScale;
-
- int _gapPos1, _gapPos2;
- _gapPos1 = userParameters->getGapPos1();
- _gapPos2 = userParameters->getGapPos2();
- const SeqArray* _ptrToSeqArray = alignPtr->getSeqArray(); //This is faster!
-
- for (si = utilityObject->MAX(0, iStart); si < _numSeqs && si < iEnd; si++)
- {
- n = alignPtr->getSeqLength(si + 1);
- len1 = 0;
- for (i = 1; i <= n; i++)
- {
- res = (*_ptrToSeqArray)[si + 1][i];
- if ((res != _gapPos1) && (res != _gapPos2))
- {
- len1++;
- }
- }
-
- for (sj = utilityObject->MAX(si+1, jStart+1); sj < _numSeqs && sj < jEnd; sj++)
- {
- m = alignPtr->getSeqLength(sj + 1);
- if (n == 0 || m == 0)
- {
- distMat->SetAt(si + 1, sj + 1, 1.0);
- distMat->SetAt(sj + 1, si + 1, 1.0);
- continue;
- }
- len2 = 0;
- for (i = 1; i <= m; i++)
- {
- res = (*_ptrToSeqArray)[sj + 1][i];
- if ((res != _gapPos1) && (res != _gapPos2))
- {
- len2++;
- }
- }
-
- if (_DNAFlag)
- {
- _gapOpen = static_cast<int>(2 * _pwGapOpen * intScale *
- gapOpenScale);
- _gapExtend = static_cast<int>(_pwGapExtend * intScale * gapExtendScale);
- }
- else
- {
- if (_matAvgScore <= 0)
- {
- _gapOpen = 2 * static_cast<int>((_pwGapOpen +
- log(static_cast<double>(utilityObject->MIN(n, m)))) * intScale);
- }
- else
- {
- _gapOpen = static_cast<int>(2 * _matAvgScore * (_pwGapOpen +
- log(static_cast<double>(utilityObject->MIN(n, m)))) * gapOpenScale);
- }
- _gapExtend = static_cast<int>(_pwGapExtend * intScale);
- }
- // align the sequences
-
- seq1 = si + 1;
- seq2 = sj + 1;
-
- _ptrToSeq1 = alignPtr->getSequence(seq1);
- _ptrToSeq2 = alignPtr->getSequence(seq2);
-
- forwardPass(_ptrToSeq1, _ptrToSeq2, n, m);
- reversePass(_ptrToSeq1, _ptrToSeq2);
-
- lastPrint = 0;
- printPtr = 1;
-
- // use Myers and Miller to align two sequences
-
- maxScore = diff(sb1 - 1, sb2 - 1, se1 - sb1 + 1, se2 - sb2 + 1,
- (int)0, (int)0);
-
- // calculate percentage residue identity
-
- mmScore = tracePath(sb1, sb2);
-
- if (len1 == 0 || len2 == 0)
- {
- mmScore = 0;
- }
- else
- {
- mmScore /= (float)utilityObject->MIN(len1, len2);
- }
-
- _score = ((float)100.0 - mmScore) / (float)100.0;
- distMat->SetAt(si + 1, sj + 1, _score);
- distMat->SetAt(sj + 1, si + 1, _score);
-
- if(userParameters->getDisplayInfo())
- {
- utilityObject->info("Sequences (%d:%d) Aligned. Score: %d",
- si+1, sj+1, (int)mmScore);
- }
- }
- }
-
- displ.clear();
- HH.clear();
- DD.clear();
- RR.clear();
- SS.clear();
- }
- catch(const exception& e)
- {
- cerr << "An exception has occured in the FullPairwiseAlign class.\n"
- << e.what() << "\n";
- exit(1);
- }
-}
-
-void FullPairwiseAlign::add(int v)
-{
- if (lastPrint < 0)
- {
- displ[printPtr - 1] = v;
- displ[printPtr++] = lastPrint;
- }
- else
- {
- lastPrint = displ[printPtr++] = v;
- }
-}
-
-inline int FullPairwiseAlign::calcScore(int iat, int jat, int v1, int v2)
-{
- return matrix[(*_ptrToSeq1)[v1 + iat]][(*_ptrToSeq2)[v2 + jat]];
-}
-
-float FullPairwiseAlign::tracePath(int tsb1, int tsb2)
-{
- int res1, res2;
- int i1, i2;
- int i, k, pos, toDo;
- int count;
- float score;
-
- toDo = printPtr - 1;
- i1 = tsb1;
- i2 = tsb2;
-
- pos = 0;
- count = 0;
- for (i = 1; i <= toDo; ++i)
- {
- if (displ[i] == 0)
- {
- res1 = (*_ptrToSeq1)[i1];
- res2 = (*_ptrToSeq2)[i2];
-
- if ((res1 != userParameters->getGapPos1()) &&
- (res2 != userParameters->getGapPos2()) && (res1 == res2))
- {
- count++;
- }
- ++i1;
- ++i2;
- ++pos;
- }
- else
- {
- if ((k = displ[i]) > 0)
- {
- i2 += k;
- pos += k;
- }
- else
- {
- i1 -= k;
- pos -= k;
- }
- }
- }
-
- score = 100.0 *(float)count;
- return (score);
-}
-
-void FullPairwiseAlign::forwardPass(const vector<int>* seq1, const vector<int>* seq2, int n, int m)
-{
- int i, j;
- int f, hh, p, t;
-
- maxScore = 0;
- se1 = se2 = 0;
- for (i = 0; i <= m; i++)
- {
- HH[i] = 0;
- DD[i] = -_gapOpen;
- }
-
- for (i = 1; i <= n; i++)
- {
- hh = p = 0;
- f = -_gapOpen;
-
- for (j = 1; j <= m; j++)
- {
-
- f -= _gapExtend;
- t = hh - _gapOpen - _gapExtend;
- if (f < t)
- {
- f = t;
- }
-
- DD[j] -= _gapExtend;
- t = HH[j] - _gapOpen - _gapExtend;
- if (DD[j] < t)
- {
- DD[j] = t;
- }
-
- hh = p + matrix[(*seq1)[i]][(*seq2)[j]];
- if (hh < f)
- {
- hh = f;
- }
- if (hh < DD[j])
- {
- hh = DD[j];
- }
- if (hh < 0)
- {
- hh = 0;
- }
-
- p = HH[j];
- HH[j] = hh;
-
- if (hh > maxScore)
- {
- maxScore = hh;
- se1 = i;
- se2 = j;
- }
- }
- }
-
-}
-
-void FullPairwiseAlign::reversePass(const vector<int>* seq1, const vector<int>* seq2)
-{
- int i, j;
- int f, hh, p, t;
- int cost;
-
- cost = 0;
- sb1 = sb2 = 1;
- for (i = se2; i > 0; i--)
- {
- HH[i] = - 1;
- DD[i] = - 1;
- }
-
- for (i = se1; i > 0; i--)
- {
- hh = f = - 1;
- if (i == se1)
- {
- p = 0;
- }
- else
- {
- p = - 1;
- }
-
- for (j = se2; j > 0; j--)
- {
-
- f -= _gapExtend;
- t = hh - _gapOpen - _gapExtend;
- if (f < t)
- {
- f = t;
- }
-
- DD[j] -= _gapExtend;
- t = HH[j] - _gapOpen - _gapExtend;
- if (DD[j] < t)
- {
- DD[j] = t;
- }
-
- hh = p + matrix[(*seq1)[i]][(*seq2)[j]];
- if (hh < f)
- {
- hh = f;
- }
- if (hh < DD[j])
- {
- hh = DD[j];
- }
-
- p = HH[j];
- HH[j] = hh;
-
- if (hh > cost)
- {
- cost = hh;
- sb1 = i;
- sb2 = j;
- if (cost >= maxScore)
- {
- break;
- }
- }
- }
- if (cost >= maxScore)
- {
- break;
- }
- }
-
-}
-
-int FullPairwiseAlign::diff(int A, int B, int M, int N, int tb, int te)
-{
- int type;
- int midi, midj, i, j;
- int midh;
- static int f, hh, e, s, t;
-
- if (N <= 0)
- {
- if (M > 0)
- {
- del(M);
- }
-
- return ( -(int)tbgap(M, tb));
- }
-
- if (M <= 1)
- {
- if (M <= 0)
- {
- add(N);
- return ( -(int)tbgap(N, tb));
- }
-
- midh = - (tb + _gapExtend) - tegap(N, te);
- hh = - (te + _gapExtend) - tbgap(N, tb);
- if (hh > midh)
- {
- midh = hh;
- }
- midj = 0;
- for (j = 1; j <= N; j++)
- {
- hh = calcScore(1, j, A, B) - tegap(N - j, te) - tbgap(j - 1, tb);
- if (hh > midh)
- {
- midh = hh;
- midj = j;
- }
- }
-
- if (midj == 0)
- {
- del(1);
- add(N);
- }
- else
- {
- if (midj > 1)
- {
- add(midj - 1);
- }
- displ[printPtr++] = lastPrint = 0;
- if (midj < N)
- {
- add(N - midj);
- }
- }
- return midh;
- }
-
- // Divide: Find optimum midpoint (midi,midj) of cost midh
-
- midi = M / 2;
- HH[0] = 0;
- t = - tb;
- for (j = 1; j <= N; j++)
- {
- HH[j] = t = t - _gapExtend;
- DD[j] = t - _gapOpen;
- }
-
- t = - tb;
- for (i = 1; i <= midi; i++)
- {
- s = HH[0];
- HH[0] = hh = t = t - _gapExtend;
- f = t - _gapOpen;
- for (j = 1; j <= N; j++)
- {
- if ((hh = hh - _gapOpen - _gapExtend) > (f = f - _gapExtend))
- {
- f = hh;
- }
- if ((hh = HH[j] - _gapOpen - _gapExtend) > (e = DD[j] - _gapExtend))
- {
- e = hh;
- }
- hh = s + calcScore(i, j, A, B);
- if (f > hh)
- {
- hh = f;
- }
- if (e > hh)
- {
- hh = e;
- }
-
- s = HH[j];
- HH[j] = hh;
- DD[j] = e;
- }
- }
-
- DD[0] = HH[0];
-
- RR[N] = 0;
- t = - te;
- for (j = N - 1; j >= 0; j--)
- {
- RR[j] = t = t - _gapExtend;
- SS[j] = t - _gapOpen;
- }
-
- t = - te;
- for (i = M - 1; i >= midi; i--)
- {
- s = RR[N];
- RR[N] = hh = t = t - _gapExtend;
- f = t - _gapOpen;
-
- for (j = N - 1; j >= 0; j--)
- {
-
- if ((hh = hh - _gapOpen - _gapExtend) > (f = f - _gapExtend))
- {
- f = hh;
- }
- if ((hh = RR[j] - _gapOpen - _gapExtend) > (e = SS[j] - _gapExtend))
- {
- e = hh;
- }
- hh = s + calcScore(i + 1, j + 1, A, B);
- if (f > hh)
- {
- hh = f;
- }
- if (e > hh)
- {
- hh = e;
- }
-
- s = RR[j];
- RR[j] = hh;
- SS[j] = e;
-
- }
- }
-
- SS[N] = RR[N];
-
- midh = HH[0] + RR[0];
- midj = 0;
- type = 1;
- for (j = 0; j <= N; j++)
- {
- hh = HH[j] + RR[j];
- if (hh >= midh)
- if (hh > midh || (HH[j] != DD[j] && RR[j] == SS[j]))
- {
- midh = hh;
- midj = j;
- }
- }
-
- for (j = N; j >= 0; j--)
- {
- hh = DD[j] + SS[j] + _gapOpen;
- if (hh > midh)
- {
- midh = hh;
- midj = j;
- type = 2;
- }
- }
-
- // Conquer recursively around midpoint
-
-
- if (type == 1)
- {
- // Type 1 gaps
- diff(A, B, midi, midj, tb, _gapOpen);
- diff(A + midi, B + midj, M - midi, N - midj, _gapOpen, te);
- }
- else
- {
- diff(A, B, midi - 1, midj, tb, 0);
- del(2);
- diff(A + midi + 1, B + midj, M - midi - 1, N - midj, 0, te);
- }
-
- return midh; // Return the score of the best alignment
-}
-
-void FullPairwiseAlign::del(int k)
-{
- if (lastPrint < 0)
- {
- lastPrint = displ[printPtr - 1] -= k;
- }
- else
- {
- lastPrint = displ[printPtr++] = - (k);
- }
-}
-
-int FullPairwiseAlign::gap(int k)
-{
- if(k <= 0)
- {
- return 0;
- }
- else
- {
- return _gapOpen + _gapExtend * k;
- }
-}
-
-int FullPairwiseAlign::tbgap(int k, int tb)
-{
- if(k <= 0)
- {
- return 0;
- }
- else
- {
- return tb + _gapExtend * k;
- }
-}
-
-int FullPairwiseAlign::tegap(int k, int te)
-{
- if(k <= 0)
- {
- return 0;
- }
- else
- {
- return te + _gapExtend * k;
- }
-}
-
-}