Delete unneeded directory
[jabaws.git] / website / archive / binaries / mac / src / clustalw / src / multipleAlign / MSA.cpp
diff --git a/website/archive/binaries/mac/src/clustalw/src/multipleAlign/MSA.cpp b/website/archive/binaries/mac/src/clustalw/src/multipleAlign/MSA.cpp
deleted file mode 100644 (file)
index 2499039..0000000
+++ /dev/null
@@ -1,744 +0,0 @@
-/**
- * Author: Mark Larkin
- * 
- * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson.  
- */
-/**
- * @author Mark Larkin, Conway Institute, UCD. mark.larkin@ucd.ie
- * Changes:
- *
- * Mark: 23-01-2007: There was a problem with running an alignment with only 
- * one sequence in it. I needed to make a change to the multiSeqAlign function.
- ****************************************************************************/
-#ifdef HAVE_CONFIG_H
-    #include "config.h"
-#endif
-#include "MSA.h"
-#include "MyersMillerProfileAlign.h"
-//#include "../phylogeneticTree/ClusterTree.h"
-#include "../general/debuglogObject.h"
-
-namespace clustalw
-{
-
-/**
- * 
- * @param alnPtr 
- * @param distMat 
- * @param iStart 
- * @param phylipName 
- * @return 
- */
-int MSA::multiSeqAlign(Alignment* alnPtr, DistMatrix* distMat, vector<int>* seqWeight, AlignmentSteps* progSteps, int iStart)
-{
-        
-    if(!progSteps)
-    {
-        return 0;
-    }
-        
-    int* aligned;
-    vector<int> group;
-    int ix;
-    
-    int* maxid;
-    int max = 0, sum = 0;
-    vector<int> treeWeight;
-    int i = 0, j = 0, set = 0, iseq = 0;
-    int entries = 0;
-    int score = 0;
-    int _numSteps = 0;
-    utilityObject->info("Start of Multiple Alignment\n"); 
-
-    int _numSeqs = alnPtr->getNumSeqs();
-    
-    vector<int> newOutputIndex(_numSeqs);   
-   
-    alnPtr->addSeqWeight(seqWeight);
-    
-    ProfileAlignAlgorithm* alignAlgorithm = new MyersMillerProfileAlign;
-    _numSteps = progSteps->getNumSteps();
-    // for each sequence, find the most closely related sequence
-
-    maxid = new int[_numSeqs + 1];
-
-    for (i = 1; i <= _numSeqs; i++)
-    {
-        maxid[i] =  -1;
-        for (j = 1; j <= _numSeqs; j++)
-            if (j != i && maxid[i] < (*distMat)(i, j))
-            {
-                maxid[i] = static_cast<int>((*distMat)(i, j));
-            }
-    }
-
-    // group the sequences according to their relative divergence
-
-    if (iStart == 0)
-    {
-
-        // start the multiple alignments.........
-
-        utilityObject->info("Aligning...");
-        // first pass, align closely related sequences first....
-        
-        
-        ix = 0;
-        aligned = new int[_numSeqs + 1];
-        
-        for (i = 0; i <= _numSeqs; i++)
-        {
-            aligned[i] = 0;
-        }
-        
-        const vector<vector<int> >* ptrToSets = progSteps->getSteps();
-        
-        
-        for (set = 1; set <= _numSteps; ++set)
-        {
-            entries = 0;
-            for (i = 1; i <= _numSeqs; i++)
-            {
-                if (((*ptrToSets)[set][i] != 0) && 
-                    (maxid[i] > userParameters->getDivergenceCutoff()))
-                {
-                    entries++;
-                    if (aligned[i] == 0)
-                    {
-                        if (userParameters->getOutputOrder() == INPUT)
-                        {
-                            ++ix;
-                            newOutputIndex[i - 1] = i;
-                        }
-                        else
-                        {
-                            if(ix >= (int)newOutputIndex.size())
-                            {
-                                cerr << "ERROR: size = " << newOutputIndex.size() 
-                                     << "ix = " << ix << "\n";
-                                exit(1);
-                            }
-                            else
-                            {
-                                newOutputIndex[ix] = i;
-                                ++ix;
-                            }
-                        }
-
-                        aligned[i] = 1;
-                    }
-                }
-            }
-
-            if (entries > 0)
-            {
-                #if DEBUGFULL
-                    if(logObject && DEBUGLOG)
-                    {    
-                        logObject->logMsg("Doing profile align");
-                    }
-                #endif                 
-                score = alignAlgorithm->profileAlign(alnPtr, distMat, progSteps->getStep(set),
-                                                     aligned);
-            }
-            else
-            {
-                score = 0;
-            }
-
-
-            // negative score means fatal error... exit now! 
-
-            if (score < 0)
-            {
-                return (-1);
-            }
-            if(userParameters->getDisplayInfo())
-            {
-                if ((entries > 0) && (score > 0))
-                {
-                    utilityObject->info("Group %d: Sequences:%4d      Score:%d",
-                                        set, entries, score);                     
-                }
-                else
-                {
-                    utilityObject->info("Group %d:                     Delayed", set);
-                }
-            }
-        }
-    }
-
-    else
-    {
-        aligned = new int[_numSeqs + 1];
-        ix = 0;
-        for (i = 1; i <= iStart + 1; i++)
-        {
-            aligned[i] = 1;
-            ++ix;
-            newOutputIndex[i - 1] = i;
-        }
-        for (i = iStart + 2; i <= _numSeqs; i++)
-        {
-            aligned[i] = 0;
-        }
-    }
-
-    // second pass - align remaining, more divergent sequences..... 
-
-    // if not all sequences were aligned, for each unaligned sequence,
-    // find it's closest pair amongst the aligned sequences.
-
-    group.resize(_numSeqs + 1); 
-    treeWeight.resize(_numSeqs); 
-
-    for (i = 0; i < _numSeqs; i++)
-    {
-        treeWeight[i] = (*seqWeight)[i];
-    }
-
-    // if we haven't aligned any sequences, in the first pass - align the
-    // two most closely related sequences now
-    if (ix == 0)
-    {
-        max =  -1;
-        iseq = 0;
-
-        for (i = 1; i <= _numSeqs; i++)
-        {
-            for (j = i + 1; j <= _numSeqs; j++)
-            {
-                if (max < (*distMat)(i, j))
-                {
-                    max = static_cast<int>((*distMat)(i, j)); // Mark change 17-5-07
-                    iseq = i;
-                }
-            }
-        }
-        aligned[iseq] = 1;
-        if (userParameters->getOutputOrder() == INPUT)
-        {
-            ++ix;
-            newOutputIndex[iseq - 1] = iseq;
-        }
-        else
-        {
-            newOutputIndex[ix] = iseq;
-            ++ix;
-        }
-    }
-
-    while (ix < _numSeqs)
-    {
-        for (i = 1; i <= _numSeqs; i++)
-        {
-            if (aligned[i] == 0)
-            {
-                maxid[i] =  - 1;
-                for (j = 1; j <= _numSeqs; j++)
-                    if ((maxid[i] < (*distMat)(i, j)) && (aligned[j] != 0))
-                    {
-                        maxid[i] = static_cast<int>((*distMat)(i, j));// Mark change 17-5-07
-                    }
-
-            }
-        }
-        // find the most closely related sequence to those already aligned
-
-        max =  - 1;
-        iseq = 0;
-        for (i = 1; i <= _numSeqs; i++)
-        {
-            if ((aligned[i] == 0) && (maxid[i] > max))
-            {
-                max = maxid[i];
-                iseq = i;
-            }
-        }
-
-
-        // align this sequence to the existing alignment
-        // weight sequences with percent identity with profile
-        // OR...., multiply sequence weights from tree by percent identity with new sequence 
-        if (userParameters->getNoWeights() == false)
-        {
-            for (j = 0; j < _numSeqs; j++)
-                if (aligned[j + 1] != 0)
-                {
-                    (*seqWeight)[j] = static_cast<int>(treeWeight[j] * (*distMat)(j + 1, iseq));
-                }
-
-            // Normalise the weights, such that the sum of the weights = INT_SCALE_FACTOR
-
-            sum = 0;
-            for (j = 0; j < _numSeqs; j++)
-            {
-                if (aligned[j + 1] != 0)
-                {
-                    sum += (*seqWeight)[j];
-                }
-            }
-
-            if (sum == 0)
-            {
-                for (j = 0; j < _numSeqs; j++)
-                {
-                    (*seqWeight)[j] = 1;
-                }
-
-                sum = j;
-            }
-            for (j = 0; j < _numSeqs; j++)
-            {
-                if (aligned[j + 1] != 0)
-                {
-                    (*seqWeight)[j] = ((*seqWeight)[j] * INT_SCALE_FACTOR) / sum;
-                    if ((*seqWeight)[j] < 1)
-                    {
-                        (*seqWeight)[j] = 1;
-                    }
-                }
-            }
-        }
-
-        entries = 0;
-        for (j = 1; j <= _numSeqs; j++)
-        {
-            if (aligned[j] != 0)
-            {
-                group[j] = 1;
-                entries++;
-            }
-            else if (iseq == j)
-            {
-                group[j] = 2;
-                entries++;
-            }
-        }
-        
-        alnPtr->addSeqWeight(seqWeight);
-        aligned[iseq] = 1;
-        
-        score = alignAlgorithm->profileAlign(alnPtr, distMat, &group, aligned);
-         
-        if (userParameters->getOutputOrder() == INPUT)
-        {
-            ++ix;
-            newOutputIndex[iseq - 1] = iseq;
-        }
-        else
-        {
-            newOutputIndex[ix] = iseq;
-            ++ix;
-        }
-    }
-        
-    alnPtr->addOutputIndex(&newOutputIndex);
-    
-    if(userParameters->getDisplayInfo())
-    {
-      int alignmentScore = alnPtr->alignScore(); // ?? check, FS, 2009-05-18
-    }
-    
-    delete alignAlgorithm;
-    delete [] aligned;
-    delete [] maxid;
-    return (_numSeqs);
-}
-
-
-/**
- * 
- * @param alnPtr 
- * @param distMat 
- * @param iStart 
- * @param phylipName 
- * @return 
- */
-int MSA::seqsAlignToProfile(Alignment* alnPtr, DistMatrix* distMat, vector<int>* seqWeight, int iStart, 
-                           string phylipName)
-{
-    int *aligned;  
-    vector<int> treeWeight;
-    vector<int> group;
-    int ix;
-
-    int *maxid;
-    int max = 0;
-    int i = 0, j = 0, iseq = 0;
-    int sum = 0, entries = 0;
-    int score = 0;
-    int _numSeqs = alnPtr->getNumSeqs();
-    
-    utilityObject->info("Start of Multiple Alignment\n"); 
-       
-    ProfileAlignAlgorithm* alignAlgorithm = new MyersMillerProfileAlign;
-    
-    // calculate sequence weights according to branch lengths of the tree -
-     // weights in global variable seq_weight normalised to sum to 100
-    vector<int> newOutputIndex(_numSeqs);    
-    //groupTree.calcSeqWeights(0, _numSeqs, &seqWeight);
-
-    treeWeight.resize(_numSeqs);
-    for (i = 0; i < _numSeqs; i++)
-    {
-        treeWeight[i] = (*seqWeight)[i];
-    }
-
-    // for each sequence, find the most closely related sequence
-
-    maxid = new int[_numSeqs + 1];
-    
-    for (i = 1; i <= _numSeqs; i++)
-    {
-        maxid[i] =  - 1;
-        for (j = 1; j <= _numSeqs; j++)
-        {
-            if (maxid[i] < (*distMat)(i, j))
-            {
-                maxid[i] = static_cast<int>((*distMat)(i, j)); // Mark change 17-5-07
-            }
-        }
-    }
-
-    aligned = new int[_numSeqs + 1];
-    ix = 0;
-    for (i = 1; i <= iStart + 1; i++)
-    {
-        aligned[i] = 1;
-        ++ix;
-        newOutputIndex[i - 1] = i;
-    }
-    for (i = iStart + 2; i <= _numSeqs; i++)
-    {
-        aligned[i] = 0;
-    }
-
-    // for each unaligned sequence, find it's closest pair amongst the
-    // aligned sequences. 
-
-    group.resize(_numSeqs + 1);
-
-    while (ix < _numSeqs)
-    {
-        if (ix > 0)
-        {
-            for (i = 1; i <= _numSeqs; i++)
-            {
-                if (aligned[i] == 0)
-                {
-                    maxid[i] =  - 1;
-                    for (j = 1; j <= _numSeqs; j++)
-                    {
-                        if ((maxid[i] < (*distMat)(i, j)) && (aligned[j] != 0))
-                        {
-                            maxid[i] = static_cast<int>((*distMat)(i, j));
-                        }
-                    }
-                }
-            }
-        }
-
-        // find the most closely related sequence to those already aligned
-
-        max =  -1;
-        for (i = 1; i <= _numSeqs; i++)
-        {
-            if ((aligned[i] == 0) && (maxid[i] > max))
-            {
-                max = maxid[i];
-                iseq = i;
-            }
-        }
-
-        // align this sequence to the existing alignment 
-        entries = 0;
-        for (j = 1; j <= _numSeqs; j++)
-        {
-            if (aligned[j] != 0)
-            {
-                group[j] = 1;
-                entries++;
-            }
-            else if (iseq == j)
-            {
-                group[j] = 2;
-                entries++;
-            }
-        }
-
-        aligned[iseq] = 1;
-
-        // multiply sequence weights from tree by percent
-        // identity with new sequence 
-
-        for (j = 0; j < _numSeqs; j++)
-        {
-            (*seqWeight)[j] = static_cast<int>(treeWeight[j] * (*distMat)(j + 1, iseq));
-        }
-
-        //
-        // Normalise the weights, such that the sum of the weights = INT_SCALE_FACTOR
-        //
-
-        sum = 0;
-        for (j = 0; j < _numSeqs; j++)
-        {
-            if (group[j + 1] == 1)
-            {
-                sum += (*seqWeight)[j];
-            }
-        }
-
-        if (sum == 0)
-        {
-            for (j = 0; j < _numSeqs; j++)
-            {
-                (*seqWeight)[j] = 1;
-            }
-
-            sum = j;
-        }
-        for (j = 0; j < _numSeqs; j++)
-        {
-            (*seqWeight)[j] = ((*seqWeight)[j] * INT_SCALE_FACTOR) / sum;
-            if ((*seqWeight)[j] < 1)
-            {
-                (*seqWeight)[j] = 1;
-            }
-        }
-        // Add the seqWeights to the Alignment object!!!!
-        alnPtr->addSeqWeight(seqWeight);
-        
-        score = alignAlgorithm->profileAlign(alnPtr, distMat, &group, aligned);
-        utilityObject->info("Sequence:%d     Score:%d", iseq, score);
-        if (userParameters->getOutputOrder() == INPUT)
-        {
-            ++ix;
-            newOutputIndex[iseq - 1] = iseq;
-        }
-        else
-        {
-            newOutputIndex[ix] = iseq;
-            ++ix;
-        }
-    }
-
-    delete [] aligned;
-    delete [] maxid;
-    delete alignAlgorithm;
-    alnPtr->addOutputIndex(&newOutputIndex);
-    
-    if(userParameters->getDisplayInfo())
-    {
-        alnPtr->alignScore();
-    }
-
-    return (_numSeqs);
-}
-
-
-/**
- * 
- * @param alnPtr 
- * @param distMat 
- * @return 
- */
-int MSA::calcPairwiseForProfileAlign(Alignment* alnPtr, DistMatrix* distMat)
-{
-    //Tree groupTree;
-    int i, j, temp;
-    int entries;
-    int* aligned;  
-    vector<int> group;
-    vector<int> seqWeight;
-    float dscore;
-    int score;
-    int _numSeqs = alnPtr->getNumSeqs();
-    
-    seqWeight.resize(_numSeqs);
-    ProfileAlignAlgorithm* alignAlg = new MyersMillerProfileAlign;
-    
-    utilityObject->info("Start of Initial Alignment");
-    /* calculate sequence weights according to branch lengths of the tree -
-     * weights in global variable seq_weight normalised to sum to INT_SCALE_FACTOR */
-
-    temp = INT_SCALE_FACTOR / _numSeqs;
-    for (i = 0; i < _numSeqs; i++)
-    {
-        seqWeight[i] = temp;
-    }
-
-    userParameters->setDistanceTree(false);
-
-    /* do the initial alignment.........  */
-
-    group.resize(_numSeqs + 1);
-
-    for (i = 1; i <= alnPtr->getProfile1NumSeqs(); ++i)
-    {
-        group[i] = 1;
-    }
-
-    for (i = alnPtr->getProfile1NumSeqs() + 1; i <= _numSeqs; ++i)
-    {
-        group[i] = 2;
-    }
-
-    entries = _numSeqs;
-
-    aligned = new int[_numSeqs + 1];
-
-    for (i = 1; i <= _numSeqs; i++)
-    {
-        aligned[i] = 1;
-    }
-
-    alnPtr->addSeqWeight(&seqWeight);
-    
-    score = alignAlg->profileAlign(alnPtr, distMat, &group, aligned);
-    utilityObject->info("Sequences:%d      Score:%d", entries, score);
-    delete [] aligned;
-    
-    for (i = 1; i <= _numSeqs; i++)
-    {
-        for (j = i + 1; j <= _numSeqs; j++)
-        {
-            dscore = alnPtr->countid(i, j);
-            (*distMat)(i, j) = ((double)100.0 - (double)dscore) / (double)100.0;
-            (*distMat)(j, i) = (*distMat)(i, j);
-        }
-    }
-    delete alignAlg;
-    return (_numSeqs);
-
-}
-
-
-/**
- * 
- * @param alnPtr 
- * @param distMat 
- * @param p1TreeName 
- * @param p2TreeName 
- * @return 
- */
-int MSA::doProfileAlign(Alignment* alnPtr, DistMatrix* distMat, vector<int>* prof1Weight, vector<int>* prof2Weight)
-{
-    //Tree groupTree1, groupTree2;
-    int i, j, sum, entries;
-    int score;
-    int *aligned;
-    vector<int> group;
-    int *maxid;
-    //vector<int> prof1Weight, prof2Weight;
-    int _profile1NumSeqs = alnPtr->getProfile1NumSeqs();
-    int _numSeqs = alnPtr->getNumSeqs();
-    vector<int> _seqWeight, _outputIndex;    
-    
-    utilityObject->info("Start of Multiple Alignment\n");
-        
-    _seqWeight.resize(_numSeqs + 1);
-    _outputIndex.resize(_numSeqs);
-    ProfileAlignAlgorithm* alignAlgorithm = new MyersMillerProfileAlign;
-
-    // weight sequences with max percent identity with other profile
-
-    maxid = new int[_numSeqs + 1];
-    for (i = 0; i < _profile1NumSeqs; i++)
-    {
-        maxid[i] = 0;
-        for (j = _profile1NumSeqs + 1; j <= _numSeqs; j++)
-        {
-            if (maxid[i] < (*distMat)(i + 1, j))
-            {
-                maxid[i] = static_cast<int>((*distMat)(i + 1, j)); // Mark change 17-5-07
-            } 
-        }
-        _seqWeight[i] = maxid[i] * (*prof1Weight)[i];
-    }
-
-    for (i = _profile1NumSeqs; i < _numSeqs; i++)
-    {
-        maxid[i] =  - 1;
-        for (j = 1; j <= _profile1NumSeqs; j++)
-        {
-            if (maxid[i] < (*distMat)(i + 1, j))
-            {
-                maxid[i] = static_cast<int>((*distMat)(i + 1, j));// Mark change 17-5-07
-            }
-        }
-        _seqWeight[i] = maxid[i] * (*prof2Weight)[i];
-    }
-    //
-    // Normalise the weights, such that the sum of the weights = INT_SCALE_FACTOR
-    //
-
-    sum = 0;
-    for (j = 0; j < _numSeqs; j++)
-    {
-        sum += _seqWeight[j];
-    }
-
-    if (sum == 0)
-    {
-        for (j = 0; j < _numSeqs; j++)
-        {
-            _seqWeight[j] = 1;
-        }
-
-        sum = j;
-    }
-    for (j = 0; j < _numSeqs; j++)
-    {
-        _seqWeight[j] = (_seqWeight[j] * INT_SCALE_FACTOR) / sum;
-        if (_seqWeight[j] < 1)
-        {
-            _seqWeight[j] = 1;
-        }
-    }
-
-    // do the alignment.........  /
-
-    utilityObject->info("Aligning...");
-    group.resize(_numSeqs + 1);
-
-    for (i = 1; i <= _profile1NumSeqs; ++i)
-    {
-        group[i] = 1;
-    }
-
-    for (i = _profile1NumSeqs + 1; i <= _numSeqs; ++i)
-    {
-        group[i] = 2;
-    }
-
-    entries = _numSeqs;
-
-    aligned = new int[_numSeqs + 1];
-    for (i = 1; i <= _numSeqs; i++)
-    {
-        aligned[i] = 1;
-    }
-
-    alnPtr->addSeqWeight(&_seqWeight);
-
-    score = alignAlgorithm->profileAlign(alnPtr, distMat, &group, aligned);
-   
-    utilityObject->info("Sequences:%d      Score:%d", entries, score);
-    
-    for (i = 1; i <= _numSeqs; i++)
-    {
-        _outputIndex[i - 1] = i;
-    }
-
-    alnPtr->addOutputIndex(&_outputIndex);
-    
-    delete alignAlgorithm;
-    delete [] aligned;
-    delete [] maxid;
-    return (_numSeqs);
-    return 1;
-}
-}