Mac binaries
[jabaws.git] / website / archive / binaries / mac / src / clustalw / src / pairwise / FastPairwiseAlign.cpp
diff --git a/website/archive/binaries/mac/src/clustalw/src/pairwise/FastPairwiseAlign.cpp b/website/archive/binaries/mac/src/clustalw/src/pairwise/FastPairwiseAlign.cpp
new file mode 100644 (file)
index 0000000..9daa0e6
--- /dev/null
@@ -0,0 +1,661 @@
+/**
+ * Author: Mark Larkin
+ * 
+ * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson.  
+ */
+#ifdef HAVE_CONFIG_H
+    #include "config.h"
+#endif
+#include <math.h>
+#include "FastPairwiseAlign.h"
+
+namespace clustalw
+{
+
+FastPairwiseAlign::FastPairwiseAlign()
+{
+    _maxAlnLength = 0;
+}
+
+void FastPairwiseAlign::pairwiseAlign(Alignment *alignPtr, DistMatrix *distMat, int iStart, 
+                                      int iEnd, int jStart, int jEnd)
+{
+    try
+    {
+        if(distMat->getSize() != alignPtr->getNumSeqs() + 1)
+        {
+            cerr << "The distance matrix is not the right size!\n"
+                 << "Need to terminate program.\n";
+            exit(1);
+        }
+        if((iStart < 0) || (iEnd < iStart) || (jStart < 0) || (jEnd < jStart))
+        {
+            cout << "The range for pairwise Alignment is incorrect.\n"
+                 << "Need to terminate program.\n";
+            exit(1);
+        }
+    
+        int i, j, dsr;
+        double calcScore;
+        bool _DNAFlag = userParameters->getDNAFlag();
+        _maxAlnLength = alignPtr->getMaxAlnLength();
+        int num = (2 * _maxAlnLength) + 1;
+        accum.ResizeRect(5, num);
+    
+        displ.resize(num);
+        slopes.resize(num);
+        diagIndex.resize(num);
+
+        zza.resize(_maxAlnLength + 1);
+        zzb.resize(_maxAlnLength + 1);
+        zzc.resize(_maxAlnLength + 1);
+        zzd.resize(_maxAlnLength + 1);
+    
+        if (_DNAFlag)
+        {
+            userParameters->setDNAParams();
+        }
+        else
+        {
+            userParameters->setProtParams();
+        }
+
+        cout << "\n\n";
+    
+        for (i = iStart + 1; i <= iEnd; ++i)
+        {
+            const vector<int>* _seqIPtr = alignPtr->getSequence(i);
+            int _seqILength = alignPtr->getSeqLength(i);
+            if (_DNAFlag)
+            {
+                makeNPtrs(zza, zzc, _seqIPtr, _seqILength);
+            }
+            else
+            {
+                makePPtrs(zza, zzc, _seqIPtr, _seqILength);
+            }
+            double _score;
+           for (j = jStart + 2 > i+1 ? jStart + 2 : i+1; j <= jEnd; ++j)
+            {
+                const vector<int>* _seqJPtr = alignPtr->getSequence(j);
+                int _seqJLength = alignPtr->getSeqLength(j);
+                if (_DNAFlag)
+                {
+                    makeNPtrs(zzb, zzd, _seqJPtr, _seqJLength);
+                }
+                else
+                {
+                    makePPtrs(zzb, zzd, _seqJPtr, _seqJLength);
+                }
+                pairAlign(_seqIPtr, _seqILength, _seqJLength);
+                if (!maxSoFar)
+                {
+                    calcScore = 0.0;
+                }
+                else
+                {
+                    calcScore = (double)accum[0][maxSoFar];
+                    if (userParameters->getPercent())
+                    {
+                        dsr = (_seqILength < _seqJLength) ? _seqILength
+                            : _seqJLength;
+                        calcScore = (calcScore / (double)dsr) *100.0;
+                    }
+                }
+                _score = (100.0 - calcScore) / 100.0;
+                distMat->SetAt(i, j, _score);
+                //distMat->SetAt(j, i, _score); /* distMat symmetric, FS, 2009-04-06 */
+
+            
+                if(userParameters->getDisplayInfo())
+                {
+                    if (calcScore > 0.1)
+                    {
+                        utilityObject->info("Sequences (%d:%d) Aligned. Score: %lg",
+                                            i, j, calcScore);     
+                    }
+                    else
+                    {
+                        utilityObject->info("Sequences (%d:%d) Not Aligned", i, j);
+                    }
+                }
+            }
+        }
+        accum.clearArray();    
+        displ.clear();
+        slopes.clear();
+        diagIndex.clear();
+
+        zza.clear();
+        zzb.clear();
+        zzc.clear();
+        zzd.clear();
+    }
+    catch(const exception& e)
+    {
+        cerr << "An exception has occured in the FastPairwiseAlign class.\n"
+             << e.what() << "\n";
+        exit(1);    
+    }    
+}
+
+
+/*
+ * Note: There is a problem with the treatment of DNA/RNA. 
+ * During file reading all residues are encoded as AminoAcids, 
+ * even before it has been established if they are AA or not. 
+ * This is bad and will have to be changed (later). 
+ * 'A' is assigned code 0, C is assigned 2, G = 6, T=18, U=19. 
+ * However, the fast alignment routines require that 
+ * A=0, C=1, G=2, T=U=3. In the best case the results of the 
+ * fast alignment (of DNA) will simply be meaningless, the 
+ * worst case is a core dump. 
+ * As a quick fix I implemented the following mask, that 
+ * (for DNA/RNA) translates 0->0, 2->1, 6->2, 18->3, 19->3. 
+ * This is awfull (it is not OO compliant) but it was quick, 
+ * uses (much) less memory than a second DNA array, and is 
+ * (much) faster than calling a translation function. 
+ * Ideally this will be removed, but this requires changes 
+ * to (i) the sequence encoding during file-reading AND 
+ * (ii) all the DNA substitution matrices. (Fabian, 2009-02-25)
+ *
+ *                A  B  C  D  E  F  G  H  I  K  L  M  0  N  P  Q  R  S 
+ *                T  U  W  X  Y  Z (goodmeasuregoodmeasuregood) */
+int ziAA2DNA[] = {0,-1, 1,-1,-1,-1, 2,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+                 3, 3,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1};
+const int ciMaxResID = 3;
+
+
+
+
+void FastPairwiseAlign::pairAlign(const vector<int>* seq, int l1, int l2)
+{
+    int pot[8], i, j, l, m, limit, pos, tl1, vn1, vn2, flen, osptr, fs;
+    int tv1, tv2, encrypt, subt1, subt2, rmndr;
+    bool flag;
+    int residue; 
+    bool _DNAFlag = userParameters->getDNAFlag();
+    int _ktup = userParameters->getKtup();
+    int _maxAA = userParameters->getMaxAA();
+    int _windowGap = userParameters->getWindowGap();
+    int _window = userParameters->getWindow();
+    int _signif = userParameters->getSignif();
+        
+    if (_DNAFlag)
+    {
+        for (i = 1; i <= _ktup; ++i)
+        {
+            pot[i] = (int)pow((double)4, (double)(i - 1));
+        }
+        limit = (int)pow((double)4, (double)_ktup);
+    }
+    else
+    {
+        for (i = 1; i <= _ktup; i++)
+        {
+            pot[i] = (int)pow((double)(_maxAA + 1), (double)(i - 1));
+        }
+        limit = (int)pow((double)(_maxAA + 1), (double)_ktup);
+    }
+    tl1 = (l1 + l2) - 1;
+
+    for (i = 1; i <= tl1; ++i)
+    {
+        slopes[i] = displ[i] = 0;
+        diagIndex[i] = i;
+    }
+
+
+    // increment diagonal score for each k_tuple match 
+
+    for (i = 1; i <= limit; ++i)
+    {
+        vn1 = zzc[i];
+        while (true)
+        {
+            if (!vn1)
+            {
+                break;
+            }
+            vn2 = zzd[i];
+            while (vn2 != 0)
+            {
+                osptr = vn1 - vn2 + l2;
+                ++displ[osptr];
+                vn2 = zzb[vn2];
+            }
+            vn1 = zza[vn1];
+        }
+    }
+
+    // choose the top SIGNIF diagonals 
+
+    desQuickSort(displ, diagIndex, tl1);
+
+    j = tl1 - _signif + 1;
+    if (j < 1)
+    {
+        j = 1;
+    }
+
+    // flag all diagonals within WINDOW of a top diagonal
+
+    for (i = tl1; i >= j; i--)
+    if (displ[i] > 0)
+    {
+        pos = diagIndex[i];
+        l = (1 > pos - _window) ? 1 : pos - _window;
+        m = (tl1 < pos + _window) ? tl1 : pos + _window;
+        for (; l <= m; l++)
+        {
+            slopes[l] = 1;
+        }
+    }
+
+    for (i = 1; i <= tl1; i++)
+    {
+        displ[i] = 0;
+    }
+
+
+    currFrag = maxSoFar = 0;
+
+    for (i = 1; i <= (l1 - _ktup + 1); ++i)
+    {
+        encrypt = flag = 0;
+       if (_DNAFlag){
+         for (j = 1; j <= _ktup; ++j)
+           {
+             residue = ziAA2DNA[(*seq)[i + j - 1]];
+             if ((residue < 0) || (residue > ciMaxResID))
+               {
+                 flag = true;
+                 break;
+               }
+             encrypt += ((residue) * pot[j]);
+           }
+       }
+       else {
+         for (j = 1; j <= _ktup; ++j)
+           {
+             residue = (*seq)[i + j - 1];
+             if ((residue < 0) || (residue > _maxAA))
+               {
+                 flag = true;
+                 break;
+               }
+             encrypt += ((residue) * pot[j]);
+           }
+       }
+
+        if (flag)
+        {
+            continue;
+        }
+        ++encrypt;
+
+        vn2 = zzd[encrypt];
+
+        flag = false;
+        while (true)
+        {
+            if (!vn2)
+            {
+                flag = true;
+                break;
+            }
+            osptr = i - vn2 + l2;
+            if (slopes[osptr] != 1)
+            {
+                vn2 = zzb[vn2];
+                continue;
+            }
+            flen = 0;
+            fs = _ktup;
+            next = maxSoFar;
+
+            
+            //  A-loop
+             
+
+            while (true)
+            {
+                if (!next)
+                {
+                    ++currFrag;
+                    if (currFrag >= 2 * _maxAlnLength)
+                    {
+                        utilityObject->info("(Partial alignment)");
+                        vatend = 1;
+                        return ;
+                    }
+                    displ[osptr] = currFrag;
+                    putFrag(fs, i, vn2, flen);
+                }
+                else
+                {
+                    tv1 = accum[1][next];
+                    tv2 = accum[2][next];
+                    if (fragRelPos(i, vn2, tv1, tv2))
+                    {
+                        if (i - vn2 == accum[1][next] - accum[2][next])
+                        {
+                            if (i > accum[1][next] + (_ktup - 1))
+                            {
+                                fs = accum[0][next] + _ktup;
+                            }
+                            else
+                            {
+                                rmndr = i - accum[1][next];
+                                fs = accum[0][next] + rmndr;
+                            }
+                            flen = next;
+                            next = 0;
+                            continue;
+                        }
+                        else
+                        {
+                            if (displ[osptr] == 0)
+                            {
+                                subt1 = _ktup;
+                            }
+                            else
+                            {
+                                if (i > accum[1][displ[osptr]] + (_ktup - 1))
+                                {
+                                    subt1 = accum[0][displ[osptr]] + _ktup;
+                                }
+                                else
+                                {
+                                    rmndr = i - accum[1][displ[osptr]];
+                                    subt1 = accum[0][displ[osptr]] + rmndr;
+                                }
+                            }
+                            subt2 = accum[0][next] - _windowGap + _ktup;
+                            if (subt2 > subt1)
+                            {
+                                flen = next;
+                                fs = subt2;
+                            }
+                            else
+                            {
+                                flen = displ[osptr];
+                                fs = subt1;
+                            }
+                            next = 0;
+                            continue;
+                        }
+                    }
+                    else
+                    {
+                        next = accum[4][next];
+                        continue;
+                    }
+                }
+                break;
+            }
+            // End of Aloop
+            vn2 = zzb[vn2];
+        }
+    }
+    vatend = 0;
+}
+
+void FastPairwiseAlign::makePPtrs(vector<int>& tptr, vector<int>& pl, const vector<int>* seq, int length)
+{
+    int a[10];
+    int i, j, limit, code;
+    bool flag;
+    int residue;
+    int _ktup = userParameters->getKtup();
+    int _maxAA = userParameters->getMaxAA();
+    
+    for (i = 1; i <= _ktup; i++)
+    {
+        a[i] = (int)pow((double)(_maxAA + 1), (double)(i - 1));
+    }
+
+    limit = (int)pow((double)(_maxAA + 1), (double)_ktup);
+    if(limit >= (int)pl.size())
+    {
+        pl.resize(limit + 1);
+    }
+    if(length >= (int)tptr.size())
+    {
+        tptr.resize(length + 1);
+    }
+    
+    for (i = 1; i <= limit; ++i)
+    {
+        pl[i] = 0;
+    }
+    for (i = 1; i <= length; ++i) // NOTE changed this
+    {
+        tptr[i] = 0;
+    }
+
+    for (i = 1; i <= (length - _ktup + 1); ++i)
+    {
+        code = 0;
+        flag = false;
+        for (j = 1; j <= _ktup; ++j)
+        {
+            residue = (*seq)[i + j - 1];
+            if ((residue < 0) || (residue > _maxAA))
+            {
+                flag = true;
+                break;
+            }
+            code += ((residue) * a[j]);
+        }
+        if (flag)
+        {
+            continue;
+        }
+        ++code;
+        if (pl[code] != 0)
+        {
+            tptr[i] = pl[code];
+        }
+        pl[code] = i; //Ktuple code is at position i in sequence
+    }
+}
+
+void FastPairwiseAlign::makeNPtrs(vector<int>& tptr,vector<int>& pl, const vector<int>* seq, int length)
+{
+    int pot[] =
+    {
+        0, 1, 4, 16, 64, 256, 1024, 4096
+    };
+    int i, j, limit, code;
+    bool flag;
+    int residue;
+    int _ktup = userParameters->getKtup();
+    
+    limit = (int)pow((double)4, (double)_ktup);
+
+    if(limit >= (int)pl.size())
+    {
+        pl.resize(limit + 1);
+    }
+    if(length >= (int)tptr.size())
+    {
+        tptr.resize(length + 1);
+    }
+    
+    for (i = 1; i <= limit; ++i)
+    {
+        pl[i] = 0;
+    }
+    for (i = 1; i <= length; ++i)
+    {
+        tptr[i] = 0;
+    }
+
+    for (i = 1; i <= length - _ktup + 1; ++i)
+    {
+        code = 0;
+        flag = false;
+       for (j = 1; j <= _ktup; ++j)
+         {
+           residue = ziAA2DNA[(*seq)[i + j - 1]];
+           if ((residue < 0) || (residue > ciMaxResID))
+             {
+               flag = true;
+               break;
+             }
+           code += ((residue) * pot[j]);
+         }
+        if (flag)
+        {
+            continue;
+        }
+        ++code;
+        if (pl[code] != 0)
+        {
+            tptr[i] = pl[code];
+        }
+        pl[code] = i;
+    }
+
+}
+
+void FastPairwiseAlign::putFrag(int fs, int v1, int v2, int flen)
+{
+    int end;
+    accum[0][currFrag] = fs;
+    accum[1][currFrag] = v1;
+    accum[2][currFrag] = v2;
+    accum[3][currFrag] = flen;
+
+    if (!maxSoFar)
+    {
+        maxSoFar = 1;
+        accum[4][currFrag] = 0;
+        return ;
+    }
+
+    if (fs >= accum[0][maxSoFar])
+    {
+        accum[4][currFrag] = maxSoFar;
+        maxSoFar = currFrag;
+        return ;
+    }
+    else
+    {
+        next = maxSoFar;
+        while (true)
+        {
+            end = next;
+            next = accum[4][next];
+            if (fs >= accum[0][next])
+            {
+                break;
+            }
+        }
+        accum[4][currFrag] = next;
+        accum[4][end] = currFrag;
+    }
+}
+
+inline int FastPairwiseAlign::fragRelPos(int a1, int b1, int a2, int b2)
+{
+    int ret;
+    int _ktup = userParameters->getKtup();
+    
+    ret = false;
+    if (a1 - b1 == a2 - b2)
+    {
+        if (a2 < a1)
+        {
+            ret = true;
+        }
+    }
+    else
+    {
+        if (a2 + _ktup - 1 < a1 && b2 + _ktup - 1 < b1)
+        {
+            ret = true;
+        }
+    }
+    return ret;
+}
+
+void FastPairwiseAlign::desQuickSort(vector<int>& array1, vector<int>& array2, int arraySize)
+{
+    // Quicksort routine, adapted from chapter 4, page 115 of software tools 
+    // by Kernighan and Plauger, (1986) 
+    // Sort the elements of array1 and sort the 
+    // elements of array2 accordingly
+    int temp1, temp2;
+    int p, pivlin;
+    int i, j;
+    int lst[50], ust[50]; // the maximum no. of elements must be
+                            // < log(base2) of 50 
+
+    lst[1] = 1;
+    ust[1] = arraySize - 1;
+    p = 1;
+
+    while (p > 0)
+    {
+        if (lst[p] >= ust[p])
+        {
+            p--;
+        }
+        else
+        {
+            i = lst[p] - 1;
+            j = ust[p];
+            pivlin = array1[j];
+            while (i < j)
+            {
+                for (i = i + 1; array1[i] < pivlin; i++)
+                    ;
+                for (j = j - 1; j > i; j--)
+                    if (array1[j] <= pivlin)
+                    {
+                        break;
+                    }
+                if (i < j)
+                {
+                    temp1 = array1[i];
+                    array1[i] = array1[j];
+                    array1[j] = temp1;
+
+                    temp2 = array2[i];
+                    array2[i] = array2[j];
+                    array2[j] = temp2;
+                }
+            }
+
+            j = ust[p];
+
+            temp1 = array1[i];
+            array1[i] = array1[j];
+            array1[j] = temp1;
+
+            temp2 = array2[i];
+            array2[i] = array2[j];
+            array2[j] = temp2;
+
+            if (i - lst[p] < ust[p] - i)
+            {
+                lst[p + 1] = lst[p];
+                ust[p + 1] = i - 1;
+                lst[p] = i + 1;
+            }
+            else
+            {
+                lst[p + 1] = i + 1;
+                ust[p + 1] = ust[p];
+                ust[p] = i - 1;
+            }
+            p = p + 1;
+        }
+    }
+    return ;
+
+}
+
+}