+++ /dev/null
-/**
- * Author: Mark Larkin
- *
- * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson.
- */
-#ifdef HAVE_CONFIG_H
- #include "config.h"
-#endif
-#include <math.h>
-#include "FastPairwiseAlign.h"
-
-namespace clustalw
-{
-
-FastPairwiseAlign::FastPairwiseAlign()
-{
- _maxAlnLength = 0;
-}
-
-void FastPairwiseAlign::pairwiseAlign(Alignment *alignPtr, DistMatrix *distMat, int iStart,
- int iEnd, int jStart, int jEnd)
-{
- try
- {
- if(distMat->getSize() != alignPtr->getNumSeqs() + 1)
- {
- cerr << "The distance matrix is not the right size!\n"
- << "Need to terminate program.\n";
- exit(1);
- }
- if((iStart < 0) || (iEnd < iStart) || (jStart < 0) || (jEnd < jStart))
- {
- cout << "The range for pairwise Alignment is incorrect.\n"
- << "Need to terminate program.\n";
- exit(1);
- }
-
- int i, j, dsr;
- double calcScore;
- bool _DNAFlag = userParameters->getDNAFlag();
- _maxAlnLength = alignPtr->getMaxAlnLength();
- int num = (2 * _maxAlnLength) + 1;
- accum.ResizeRect(5, num);
-
- displ.resize(num);
- slopes.resize(num);
- diagIndex.resize(num);
-
- zza.resize(_maxAlnLength + 1);
- zzb.resize(_maxAlnLength + 1);
- zzc.resize(_maxAlnLength + 1);
- zzd.resize(_maxAlnLength + 1);
-
- if (_DNAFlag)
- {
- userParameters->setDNAParams();
- }
- else
- {
- userParameters->setProtParams();
- }
-
- cout << "\n\n";
-
- for (i = iStart + 1; i <= iEnd; ++i)
- {
- const vector<int>* _seqIPtr = alignPtr->getSequence(i);
- int _seqILength = alignPtr->getSeqLength(i);
- if (_DNAFlag)
- {
- makeNPtrs(zza, zzc, _seqIPtr, _seqILength);
- }
- else
- {
- makePPtrs(zza, zzc, _seqIPtr, _seqILength);
- }
- double _score;
- for (j = jStart + 2 > i+1 ? jStart + 2 : i+1; j <= jEnd; ++j)
- {
- const vector<int>* _seqJPtr = alignPtr->getSequence(j);
- int _seqJLength = alignPtr->getSeqLength(j);
- if (_DNAFlag)
- {
- makeNPtrs(zzb, zzd, _seqJPtr, _seqJLength);
- }
- else
- {
- makePPtrs(zzb, zzd, _seqJPtr, _seqJLength);
- }
- pairAlign(_seqIPtr, _seqILength, _seqJLength);
- if (!maxSoFar)
- {
- calcScore = 0.0;
- }
- else
- {
- calcScore = (double)accum[0][maxSoFar];
- if (userParameters->getPercent())
- {
- dsr = (_seqILength < _seqJLength) ? _seqILength
- : _seqJLength;
- calcScore = (calcScore / (double)dsr) *100.0;
- }
- }
- _score = (100.0 - calcScore) / 100.0;
- distMat->SetAt(i, j, _score);
- //distMat->SetAt(j, i, _score); /* distMat symmetric, FS, 2009-04-06 */
-
-
- if(userParameters->getDisplayInfo())
- {
- if (calcScore > 0.1)
- {
- utilityObject->info("Sequences (%d:%d) Aligned. Score: %lg",
- i, j, calcScore);
- }
- else
- {
- utilityObject->info("Sequences (%d:%d) Not Aligned", i, j);
- }
- }
- }
- }
- accum.clearArray();
- displ.clear();
- slopes.clear();
- diagIndex.clear();
-
- zza.clear();
- zzb.clear();
- zzc.clear();
- zzd.clear();
- }
- catch(const exception& e)
- {
- cerr << "An exception has occured in the FastPairwiseAlign class.\n"
- << e.what() << "\n";
- exit(1);
- }
-}
-
-
-/*
- * Note: There is a problem with the treatment of DNA/RNA.
- * During file reading all residues are encoded as AminoAcids,
- * even before it has been established if they are AA or not.
- * This is bad and will have to be changed (later).
- * 'A' is assigned code 0, C is assigned 2, G = 6, T=18, U=19.
- * However, the fast alignment routines require that
- * A=0, C=1, G=2, T=U=3. In the best case the results of the
- * fast alignment (of DNA) will simply be meaningless, the
- * worst case is a core dump.
- * As a quick fix I implemented the following mask, that
- * (for DNA/RNA) translates 0->0, 2->1, 6->2, 18->3, 19->3.
- * This is awfull (it is not OO compliant) but it was quick,
- * uses (much) less memory than a second DNA array, and is
- * (much) faster than calling a translation function.
- * Ideally this will be removed, but this requires changes
- * to (i) the sequence encoding during file-reading AND
- * (ii) all the DNA substitution matrices. (Fabian, 2009-02-25)
- *
- * A B C D E F G H I K L M 0 N P Q R S
- * T U W X Y Z (goodmeasuregoodmeasuregood) */
-int ziAA2DNA[] = {0,-1, 1,-1,-1,-1, 2,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
- 3, 3,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1};
-const int ciMaxResID = 3;
-
-
-
-
-void FastPairwiseAlign::pairAlign(const vector<int>* seq, int l1, int l2)
-{
- int pot[8], i, j, l, m, limit, pos, tl1, vn1, vn2, flen, osptr, fs;
- int tv1, tv2, encrypt, subt1, subt2, rmndr;
- bool flag;
- int residue;
- bool _DNAFlag = userParameters->getDNAFlag();
- int _ktup = userParameters->getKtup();
- int _maxAA = userParameters->getMaxAA();
- int _windowGap = userParameters->getWindowGap();
- int _window = userParameters->getWindow();
- int _signif = userParameters->getSignif();
-
- if (_DNAFlag)
- {
- for (i = 1; i <= _ktup; ++i)
- {
- pot[i] = (int)pow((double)4, (double)(i - 1));
- }
- limit = (int)pow((double)4, (double)_ktup);
- }
- else
- {
- for (i = 1; i <= _ktup; i++)
- {
- pot[i] = (int)pow((double)(_maxAA + 1), (double)(i - 1));
- }
- limit = (int)pow((double)(_maxAA + 1), (double)_ktup);
- }
- tl1 = (l1 + l2) - 1;
-
- for (i = 1; i <= tl1; ++i)
- {
- slopes[i] = displ[i] = 0;
- diagIndex[i] = i;
- }
-
-
- // increment diagonal score for each k_tuple match
-
- for (i = 1; i <= limit; ++i)
- {
- vn1 = zzc[i];
- while (true)
- {
- if (!vn1)
- {
- break;
- }
- vn2 = zzd[i];
- while (vn2 != 0)
- {
- osptr = vn1 - vn2 + l2;
- ++displ[osptr];
- vn2 = zzb[vn2];
- }
- vn1 = zza[vn1];
- }
- }
-
- // choose the top SIGNIF diagonals
-
- desQuickSort(displ, diagIndex, tl1);
-
- j = tl1 - _signif + 1;
- if (j < 1)
- {
- j = 1;
- }
-
- // flag all diagonals within WINDOW of a top diagonal
-
- for (i = tl1; i >= j; i--)
- if (displ[i] > 0)
- {
- pos = diagIndex[i];
- l = (1 > pos - _window) ? 1 : pos - _window;
- m = (tl1 < pos + _window) ? tl1 : pos + _window;
- for (; l <= m; l++)
- {
- slopes[l] = 1;
- }
- }
-
- for (i = 1; i <= tl1; i++)
- {
- displ[i] = 0;
- }
-
-
- currFrag = maxSoFar = 0;
-
- for (i = 1; i <= (l1 - _ktup + 1); ++i)
- {
- encrypt = flag = 0;
- if (_DNAFlag){
- for (j = 1; j <= _ktup; ++j)
- {
- residue = ziAA2DNA[(*seq)[i + j - 1]];
- if ((residue < 0) || (residue > ciMaxResID))
- {
- flag = true;
- break;
- }
- encrypt += ((residue) * pot[j]);
- }
- }
- else {
- for (j = 1; j <= _ktup; ++j)
- {
- residue = (*seq)[i + j - 1];
- if ((residue < 0) || (residue > _maxAA))
- {
- flag = true;
- break;
- }
- encrypt += ((residue) * pot[j]);
- }
- }
-
- if (flag)
- {
- continue;
- }
- ++encrypt;
-
- vn2 = zzd[encrypt];
-
- flag = false;
- while (true)
- {
- if (!vn2)
- {
- flag = true;
- break;
- }
- osptr = i - vn2 + l2;
- if (slopes[osptr] != 1)
- {
- vn2 = zzb[vn2];
- continue;
- }
- flen = 0;
- fs = _ktup;
- next = maxSoFar;
-
-
- // A-loop
-
-
- while (true)
- {
- if (!next)
- {
- ++currFrag;
- if (currFrag >= 2 * _maxAlnLength)
- {
- utilityObject->info("(Partial alignment)");
- vatend = 1;
- return ;
- }
- displ[osptr] = currFrag;
- putFrag(fs, i, vn2, flen);
- }
- else
- {
- tv1 = accum[1][next];
- tv2 = accum[2][next];
- if (fragRelPos(i, vn2, tv1, tv2))
- {
- if (i - vn2 == accum[1][next] - accum[2][next])
- {
- if (i > accum[1][next] + (_ktup - 1))
- {
- fs = accum[0][next] + _ktup;
- }
- else
- {
- rmndr = i - accum[1][next];
- fs = accum[0][next] + rmndr;
- }
- flen = next;
- next = 0;
- continue;
- }
- else
- {
- if (displ[osptr] == 0)
- {
- subt1 = _ktup;
- }
- else
- {
- if (i > accum[1][displ[osptr]] + (_ktup - 1))
- {
- subt1 = accum[0][displ[osptr]] + _ktup;
- }
- else
- {
- rmndr = i - accum[1][displ[osptr]];
- subt1 = accum[0][displ[osptr]] + rmndr;
- }
- }
- subt2 = accum[0][next] - _windowGap + _ktup;
- if (subt2 > subt1)
- {
- flen = next;
- fs = subt2;
- }
- else
- {
- flen = displ[osptr];
- fs = subt1;
- }
- next = 0;
- continue;
- }
- }
- else
- {
- next = accum[4][next];
- continue;
- }
- }
- break;
- }
- // End of Aloop
- vn2 = zzb[vn2];
- }
- }
- vatend = 0;
-}
-
-void FastPairwiseAlign::makePPtrs(vector<int>& tptr, vector<int>& pl, const vector<int>* seq, int length)
-{
- int a[10];
- int i, j, limit, code;
- bool flag;
- int residue;
- int _ktup = userParameters->getKtup();
- int _maxAA = userParameters->getMaxAA();
-
- for (i = 1; i <= _ktup; i++)
- {
- a[i] = (int)pow((double)(_maxAA + 1), (double)(i - 1));
- }
-
- limit = (int)pow((double)(_maxAA + 1), (double)_ktup);
- if(limit >= (int)pl.size())
- {
- pl.resize(limit + 1);
- }
- if(length >= (int)tptr.size())
- {
- tptr.resize(length + 1);
- }
-
- for (i = 1; i <= limit; ++i)
- {
- pl[i] = 0;
- }
- for (i = 1; i <= length; ++i) // NOTE changed this
- {
- tptr[i] = 0;
- }
-
- for (i = 1; i <= (length - _ktup + 1); ++i)
- {
- code = 0;
- flag = false;
- for (j = 1; j <= _ktup; ++j)
- {
- residue = (*seq)[i + j - 1];
- if ((residue < 0) || (residue > _maxAA))
- {
- flag = true;
- break;
- }
- code += ((residue) * a[j]);
- }
- if (flag)
- {
- continue;
- }
- ++code;
- if (pl[code] != 0)
- {
- tptr[i] = pl[code];
- }
- pl[code] = i; //Ktuple code is at position i in sequence
- }
-}
-
-void FastPairwiseAlign::makeNPtrs(vector<int>& tptr,vector<int>& pl, const vector<int>* seq, int length)
-{
- int pot[] =
- {
- 0, 1, 4, 16, 64, 256, 1024, 4096
- };
- int i, j, limit, code;
- bool flag;
- int residue;
- int _ktup = userParameters->getKtup();
-
- limit = (int)pow((double)4, (double)_ktup);
-
- if(limit >= (int)pl.size())
- {
- pl.resize(limit + 1);
- }
- if(length >= (int)tptr.size())
- {
- tptr.resize(length + 1);
- }
-
- for (i = 1; i <= limit; ++i)
- {
- pl[i] = 0;
- }
- for (i = 1; i <= length; ++i)
- {
- tptr[i] = 0;
- }
-
- for (i = 1; i <= length - _ktup + 1; ++i)
- {
- code = 0;
- flag = false;
- for (j = 1; j <= _ktup; ++j)
- {
- residue = ziAA2DNA[(*seq)[i + j - 1]];
- if ((residue < 0) || (residue > ciMaxResID))
- {
- flag = true;
- break;
- }
- code += ((residue) * pot[j]);
- }
- if (flag)
- {
- continue;
- }
- ++code;
- if (pl[code] != 0)
- {
- tptr[i] = pl[code];
- }
- pl[code] = i;
- }
-
-}
-
-void FastPairwiseAlign::putFrag(int fs, int v1, int v2, int flen)
-{
- int end;
- accum[0][currFrag] = fs;
- accum[1][currFrag] = v1;
- accum[2][currFrag] = v2;
- accum[3][currFrag] = flen;
-
- if (!maxSoFar)
- {
- maxSoFar = 1;
- accum[4][currFrag] = 0;
- return ;
- }
-
- if (fs >= accum[0][maxSoFar])
- {
- accum[4][currFrag] = maxSoFar;
- maxSoFar = currFrag;
- return ;
- }
- else
- {
- next = maxSoFar;
- while (true)
- {
- end = next;
- next = accum[4][next];
- if (fs >= accum[0][next])
- {
- break;
- }
- }
- accum[4][currFrag] = next;
- accum[4][end] = currFrag;
- }
-}
-
-inline int FastPairwiseAlign::fragRelPos(int a1, int b1, int a2, int b2)
-{
- int ret;
- int _ktup = userParameters->getKtup();
-
- ret = false;
- if (a1 - b1 == a2 - b2)
- {
- if (a2 < a1)
- {
- ret = true;
- }
- }
- else
- {
- if (a2 + _ktup - 1 < a1 && b2 + _ktup - 1 < b1)
- {
- ret = true;
- }
- }
- return ret;
-}
-
-void FastPairwiseAlign::desQuickSort(vector<int>& array1, vector<int>& array2, int arraySize)
-{
- // Quicksort routine, adapted from chapter 4, page 115 of software tools
- // by Kernighan and Plauger, (1986)
- // Sort the elements of array1 and sort the
- // elements of array2 accordingly
- int temp1, temp2;
- int p, pivlin;
- int i, j;
- int lst[50], ust[50]; // the maximum no. of elements must be
- // < log(base2) of 50
-
- lst[1] = 1;
- ust[1] = arraySize - 1;
- p = 1;
-
- while (p > 0)
- {
- if (lst[p] >= ust[p])
- {
- p--;
- }
- else
- {
- i = lst[p] - 1;
- j = ust[p];
- pivlin = array1[j];
- while (i < j)
- {
- for (i = i + 1; array1[i] < pivlin; i++)
- ;
- for (j = j - 1; j > i; j--)
- if (array1[j] <= pivlin)
- {
- break;
- }
- if (i < j)
- {
- temp1 = array1[i];
- array1[i] = array1[j];
- array1[j] = temp1;
-
- temp2 = array2[i];
- array2[i] = array2[j];
- array2[j] = temp2;
- }
- }
-
- j = ust[p];
-
- temp1 = array1[i];
- array1[i] = array1[j];
- array1[j] = temp1;
-
- temp2 = array2[i];
- array2[i] = array2[j];
- array2[j] = temp2;
-
- if (i - lst[p] < ust[p] - i)
- {
- lst[p + 1] = lst[p];
- ust[p + 1] = i - 1;
- lst[p] = i + 1;
- }
- else
- {
- lst[p + 1] = i + 1;
- ust[p + 1] = ust[p];
- ust[p] = i - 1;
- }
- p = p + 1;
- }
- }
- return ;
-
-}
-
-}