Delete unneeded directory
[jabaws.git] / website / archive / binaries / mac / src / muscle / msa2.cpp
diff --git a/website/archive/binaries/mac/src/muscle/msa2.cpp b/website/archive/binaries/mac/src/muscle/msa2.cpp
deleted file mode 100644 (file)
index c82b9d7..0000000
+++ /dev/null
@@ -1,531 +0,0 @@
-#include "muscle.h"\r
-#include "msa.h"\r
-#include "seqvect.h"\r
-#include "profile.h"\r
-#include "tree.h"\r
-\r
-// These global variables are a hack to allow the tree\r
-// dependent iteration code to communicate the edge\r
-// used to divide the tree. The three-way weighting\r
-// scheme needs to know this edge in order to compute\r
-// sequence weights.\r
-static const Tree *g_ptrMuscleTree = 0;\r
-unsigned g_uTreeSplitNode1 = NULL_NEIGHBOR;\r
-unsigned g_uTreeSplitNode2 = NULL_NEIGHBOR;\r
-\r
-void MSA::GetFractionalWeightedCounts(unsigned uColIndex, bool bNormalize,\r
-  FCOUNT fcCounts[], FCOUNT *ptrfcGapStart, FCOUNT *ptrfcGapEnd,\r
-  FCOUNT *ptrfcGapExtend, FCOUNT *ptrfOcc,\r
-  FCOUNT *ptrfcLL, FCOUNT *ptrfcLG, FCOUNT *ptrfcGL, FCOUNT *ptrfcGG) const\r
-       {\r
-       const unsigned uSeqCount = GetSeqCount();\r
-       const unsigned uColCount = GetColCount();\r
-\r
-       memset(fcCounts, 0, g_AlphaSize*sizeof(FCOUNT));\r
-       WEIGHT wTotal = 0;\r
-       FCOUNT fGap = 0;\r
-       for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)\r
-               {\r
-               const WEIGHT w = GetSeqWeight(uSeqIndex);\r
-               if (IsGap(uSeqIndex, uColIndex))\r
-                       {\r
-                       fGap += w;\r
-                       continue;\r
-                       }\r
-               else if (IsWildcard(uSeqIndex, uColIndex))\r
-                       {\r
-                       const unsigned uLetter = GetLetterEx(uSeqIndex, uColIndex);\r
-                       switch (g_Alpha)\r
-                               {\r
-                       case ALPHA_Amino:\r
-                               switch (uLetter)\r
-                                       {\r
-                               case AX_B:              // D or N\r
-                                       fcCounts[AX_D] += w/2;\r
-                                       fcCounts[AX_N] += w/2;\r
-                                       break;\r
-                               case AX_Z:              // E or Q\r
-                                       fcCounts[AX_E] += w/2;\r
-                                       fcCounts[AX_Q] += w/2;\r
-                                       break;\r
-                               default:                // any\r
-                                       {\r
-                                       const FCOUNT f = w/20;\r
-                                       for (unsigned uLetter = 0; uLetter < 20; ++uLetter)\r
-                                               fcCounts[uLetter] += f;\r
-                                       break;\r
-                                       }\r
-                                       }\r
-                               break;\r
-\r
-                       case ALPHA_DNA:\r
-                       case ALPHA_RNA:\r
-                               switch (uLetter)\r
-                                       {\r
-                               case AX_R:      // G or A\r
-                                       fcCounts[NX_G] += w/2;\r
-                                       fcCounts[NX_A] += w/2;\r
-                                       break;\r
-                               case AX_Y:      // C or T/U\r
-                                       fcCounts[NX_C] += w/2;\r
-                                       fcCounts[NX_T] += w/2;\r
-                                       break;\r
-                               default:        // any\r
-                                       const FCOUNT f = w/20;\r
-                                       for (unsigned uLetter = 0; uLetter < 4; ++uLetter)\r
-                                               fcCounts[uLetter] += f;\r
-                                       break;\r
-                                       }\r
-                               break;\r
-\r
-                       default:\r
-                               Quit("Alphabet %d not supported", g_Alpha);\r
-                               }\r
-                       continue;\r
-                       }\r
-               unsigned uLetter = GetLetter(uSeqIndex, uColIndex);\r
-               fcCounts[uLetter] += w;\r
-               wTotal += w;\r
-               }\r
-       *ptrfOcc = (float) (1.0 - fGap);\r
-\r
-       if (bNormalize && wTotal > 0)\r
-               {\r
-               if (wTotal > 1.001)\r
-                       Quit("wTotal=%g\n", wTotal);\r
-               for (unsigned uLetter = 0; uLetter < g_AlphaSize; ++uLetter)\r
-                       fcCounts[uLetter] /= wTotal;\r
-//             AssertNormalized(fcCounts);\r
-               }\r
-\r
-       FCOUNT fcStartCount = 0;\r
-       if (uColIndex == 0)\r
-               {\r
-               for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)\r
-                       if (IsGap(uSeqIndex, uColIndex))\r
-                               fcStartCount += GetSeqWeight(uSeqIndex);\r
-               }\r
-       else\r
-               {\r
-               for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)\r
-                       if (IsGap(uSeqIndex, uColIndex) && !IsGap(uSeqIndex, uColIndex - 1))\r
-                               fcStartCount += GetSeqWeight(uSeqIndex);\r
-               }\r
-\r
-       FCOUNT fcEndCount = 0;\r
-       if (uColCount - 1 == uColIndex)\r
-               {\r
-               for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)\r
-                       if (IsGap(uSeqIndex, uColIndex))\r
-                               fcEndCount += GetSeqWeight(uSeqIndex);\r
-               }\r
-       else\r
-               {\r
-               for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)\r
-                       if (IsGap(uSeqIndex, uColIndex) && !IsGap(uSeqIndex, uColIndex + 1))\r
-                               fcEndCount += GetSeqWeight(uSeqIndex);\r
-               }\r
-\r
-       FCOUNT LL = 0;\r
-       FCOUNT LG = 0;\r
-       FCOUNT GL = 0;\r
-       FCOUNT GG = 0;\r
-       for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)\r
-               {\r
-               WEIGHT w = GetSeqWeight(uSeqIndex);\r
-               bool bLetterHere = !IsGap(uSeqIndex, uColIndex);\r
-               bool bLetterPrev = (uColIndex == 0 || !IsGap(uSeqIndex, uColIndex - 1));\r
-               if (bLetterHere)\r
-                       {\r
-                       if (bLetterPrev)\r
-                               LL += w;\r
-                       else\r
-                               GL += w;\r
-                       }\r
-               else\r
-                       {\r
-                       if (bLetterPrev)\r
-                               LG += w;\r
-                       else\r
-                               GG += w;\r
-                       }\r
-               }\r
-\r
-       FCOUNT fcExtendCount = 0;\r
-       if (uColIndex > 0 && uColIndex < GetColCount() - 1)\r
-               for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)\r
-                       if (IsGap(uSeqIndex, uColIndex) && IsGap(uSeqIndex, uColIndex - 1) &&\r
-                         IsGap(uSeqIndex, uColIndex + 1))\r
-                               fcExtendCount += GetSeqWeight(uSeqIndex);\r
-\r
-       *ptrfcLL = LL;\r
-       *ptrfcLG = LG;\r
-       *ptrfcGL = GL;\r
-       *ptrfcGG = GG;\r
-       *ptrfcGapStart = fcStartCount;\r
-       *ptrfcGapEnd = fcEndCount;\r
-       *ptrfcGapExtend = fcExtendCount;\r
-       }\r
-\r
-// Return true if the given column has no gaps and all\r
-// its residues are in the same biochemical group.\r
-bool MSAColIsConservative(const MSA &msa, unsigned uColIndex)\r
-       {\r
-       extern unsigned ResidueGroup[];\r
-\r
-       const unsigned uSeqCount = msa.GetColCount();\r
-       if (0 == uSeqCount)\r
-               Quit("MSAColIsConservative: empty alignment");\r
-\r
-       if (msa.IsGap(0, uColIndex))\r
-               return false;\r
-\r
-       unsigned uLetter = msa.GetLetterEx(0, uColIndex);\r
-       const unsigned uGroup = ResidueGroup[uLetter];\r
-\r
-       for (unsigned uSeqIndex = 1; uSeqIndex < uSeqCount; ++uSeqIndex)\r
-               {\r
-               if (msa.IsGap(uSeqIndex, uColIndex))\r
-                       return false;\r
-               uLetter = msa.GetLetter(uSeqIndex, uColIndex);\r
-               if (ResidueGroup[uLetter] != uGroup)\r
-                       return false;\r
-               }\r
-       return true;\r
-       }\r
-\r
-void MSAFromSeqRange(const MSA &msaIn, unsigned uFromSeqIndex, unsigned uSeqCount,\r
-  MSA &msaOut)\r
-       {\r
-       const unsigned uColCount = msaIn.GetColCount();\r
-       msaOut.SetSize(uSeqCount, uColCount);\r
-\r
-       for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)\r
-               {\r
-               const char *ptrName = msaIn.GetSeqName(uFromSeqIndex + uSeqIndex);\r
-               msaOut.SetSeqName(uSeqIndex, ptrName);\r
-\r
-               for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)\r
-                       {\r
-                       const char c = msaIn.GetChar(uFromSeqIndex + uSeqIndex, uColIndex);\r
-                       msaOut.SetChar(uSeqIndex, uColIndex, c);\r
-                       }\r
-               }\r
-       }\r
-\r
-void MSAFromColRange(const MSA &msaIn, unsigned uFromColIndex, unsigned uColCount,\r
-  MSA &msaOut)\r
-       {\r
-       const unsigned uSeqCount = msaIn.GetSeqCount();\r
-       const unsigned uInColCount = msaIn.GetColCount();\r
-\r
-       if (uFromColIndex + uColCount - 1 > uInColCount)\r
-               Quit("MSAFromColRange, out of bounds");\r
-\r
-       msaOut.SetSize(uSeqCount, uColCount);\r
-\r
-       for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)\r
-               {\r
-               const char *ptrName = msaIn.GetSeqName(uSeqIndex);\r
-               unsigned uId = msaIn.GetSeqId(uSeqIndex);\r
-               msaOut.SetSeqName(uSeqIndex, ptrName);\r
-               msaOut.SetSeqId(uSeqIndex, uId);\r
-\r
-               for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)\r
-                       {\r
-                       const char c = msaIn.GetChar(uSeqIndex, uFromColIndex + uColIndex);\r
-                       msaOut.SetChar(uSeqIndex, uColIndex, c);\r
-                       }\r
-               }\r
-       }\r
-\r
-void SeqVectFromMSA(const MSA &msa, SeqVect &v)\r
-       {\r
-       v.Clear();\r
-       const unsigned uSeqCount = msa.GetSeqCount();\r
-       for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)\r
-               {\r
-               Seq s;\r
-               msa.GetSeq(uSeqIndex, s);\r
-\r
-               s.StripGaps();\r
-               //if (0 == s.Length())\r
-               //      continue;\r
-\r
-               const char *ptrName = msa.GetSeqName(uSeqIndex);\r
-               s.SetName(ptrName);\r
-\r
-               v.AppendSeq(s);\r
-               }\r
-       }\r
-\r
-void DeleteGappedCols(MSA &msa)\r
-       {\r
-       unsigned uColIndex = 0;\r
-       for (;;)\r
-               {\r
-               if (uColIndex >= msa.GetColCount())\r
-                       break;\r
-               if (msa.IsGapColumn(uColIndex))\r
-                       msa.DeleteCol(uColIndex);\r
-               else\r
-                       ++uColIndex;\r
-               }\r
-       }\r
-\r
-void MSAFromSeqSubset(const MSA &msaIn, const unsigned uSeqIndexes[], unsigned uSeqCount,\r
-  MSA &msaOut)\r
-       {\r
-       const unsigned uColCount = msaIn.GetColCount();\r
-       msaOut.SetSize(uSeqCount, uColCount);\r
-       for (unsigned uSeqIndexOut = 0; uSeqIndexOut < uSeqCount; ++uSeqIndexOut)\r
-               {\r
-               unsigned uSeqIndexIn = uSeqIndexes[uSeqIndexOut];\r
-               const char *ptrName = msaIn.GetSeqName(uSeqIndexIn);\r
-               unsigned uId = msaIn.GetSeqId(uSeqIndexIn);\r
-               msaOut.SetSeqName(uSeqIndexOut, ptrName);\r
-               msaOut.SetSeqId(uSeqIndexOut, uId);\r
-               for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)\r
-                       {\r
-                       const char c = msaIn.GetChar(uSeqIndexIn, uColIndex);\r
-                       msaOut.SetChar(uSeqIndexOut, uColIndex, c);\r
-                       }\r
-               }\r
-       }\r
-\r
-void AssertMSAEqIgnoreCaseAndGaps(const MSA &msa1, const MSA &msa2)\r
-       {\r
-       const unsigned uSeqCount1 = msa1.GetSeqCount();\r
-       const unsigned uSeqCount2 = msa2.GetSeqCount();\r
-       if (uSeqCount1 != uSeqCount2)\r
-               Quit("Seq count differs");\r
-\r
-       for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount1; ++uSeqIndex)\r
-               {\r
-               Seq seq1;\r
-               msa1.GetSeq(uSeqIndex, seq1);\r
-\r
-               unsigned uId = msa1.GetSeqId(uSeqIndex);\r
-               unsigned uSeqIndex2 = msa2.GetSeqIndex(uId);\r
-\r
-               Seq seq2;\r
-               msa2.GetSeq(uSeqIndex2, seq2);\r
-\r
-               if (!seq1.EqIgnoreCaseAndGaps(seq2))\r
-                       {\r
-                       Log("Input:\n");\r
-                       seq1.LogMe();\r
-                       Log("Output:\n");\r
-                       seq2.LogMe();\r
-                       Quit("Seq %s differ ", msa1.GetSeqName(uSeqIndex));\r
-                       }\r
-               }\r
-       }\r
-\r
-void AssertMSAEq(const MSA &msa1, const MSA &msa2)\r
-       {\r
-       const unsigned uSeqCount1 = msa1.GetSeqCount();\r
-       const unsigned uSeqCount2 = msa2.GetSeqCount();\r
-       if (uSeqCount1 != uSeqCount2)\r
-               Quit("Seq count differs");\r
-\r
-       for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount1; ++uSeqIndex)\r
-               {\r
-               Seq seq1;\r
-               msa1.GetSeq(uSeqIndex, seq1);\r
-\r
-               unsigned uId = msa1.GetSeqId(uSeqIndex);\r
-               unsigned uSeqIndex2 = msa2.GetSeqIndex(uId);\r
-\r
-               Seq seq2;\r
-               msa2.GetSeq(uSeqIndex2, seq2);\r
-\r
-               if (!seq1.Eq(seq2))\r
-                       {\r
-                       Log("Input:\n");\r
-                       seq1.LogMe();\r
-                       Log("Output:\n");\r
-                       seq2.LogMe();\r
-                       Quit("Seq %s differ ", msa1.GetSeqName(uSeqIndex));\r
-                       }\r
-               }\r
-       }\r
-\r
-void SetMSAWeightsMuscle(MSA &msa)\r
-       {\r
-       SEQWEIGHT Method = GetSeqWeightMethod();\r
-       switch (Method)\r
-               {\r
-       case SEQWEIGHT_None:\r
-               msa.SetUniformWeights();\r
-               return;\r
-\r
-       case SEQWEIGHT_Henikoff:\r
-               msa.SetHenikoffWeights();\r
-               return;\r
-\r
-       case SEQWEIGHT_HenikoffPB:\r
-               msa.SetHenikoffWeightsPB();\r
-               return;\r
-\r
-       case SEQWEIGHT_GSC:\r
-               msa.SetGSCWeights();\r
-               return;\r
-\r
-       case SEQWEIGHT_ClustalW:\r
-               SetClustalWWeightsMuscle(msa);\r
-               return;\r
-       \r
-       case SEQWEIGHT_ThreeWay:\r
-               SetThreeWayWeightsMuscle(msa);\r
-               return;\r
-               }\r
-       Quit("SetMSAWeightsMuscle, Invalid method=%d", Method);\r
-       }\r
-\r
-static WEIGHT *g_MuscleWeights;\r
-static unsigned g_uMuscleIdCount;\r
-\r
-WEIGHT GetMuscleSeqWeightById(unsigned uId)\r
-       {\r
-       if (0 == g_MuscleWeights)\r
-               Quit("g_MuscleWeights = 0");\r
-       if (uId >= g_uMuscleIdCount)\r
-               Quit("GetMuscleSeqWeightById(%u): count=%u",\r
-                 uId, g_uMuscleIdCount);\r
-\r
-       return g_MuscleWeights[uId];\r
-       }\r
-\r
-void SetMuscleTree(const Tree &tree)\r
-       {\r
-       g_ptrMuscleTree = &tree;\r
-\r
-       if (SEQWEIGHT_ClustalW != GetSeqWeightMethod())\r
-               return;\r
-\r
-       delete[] g_MuscleWeights;\r
-\r
-       const unsigned uLeafCount = tree.GetLeafCount();\r
-       g_uMuscleIdCount = uLeafCount;\r
-       g_MuscleWeights = new WEIGHT[uLeafCount];\r
-       CalcClustalWWeights(tree, g_MuscleWeights);\r
-       }\r
-\r
-void SetClustalWWeightsMuscle(MSA &msa)\r
-       {\r
-       if (0 == g_MuscleWeights)\r
-               Quit("g_MuscleWeights = 0");\r
-       const unsigned uSeqCount = msa.GetSeqCount();\r
-       for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)\r
-               {\r
-               const unsigned uId = msa.GetSeqId(uSeqIndex);\r
-               if (uId >= g_uMuscleIdCount)\r
-                       Quit("SetClustalWWeightsMuscle: id out of range");\r
-               msa.SetSeqWeight(uSeqIndex, g_MuscleWeights[uId]);\r
-               }\r
-       msa.NormalizeWeights((WEIGHT) 1.0);\r
-       }\r
-\r
-#define        LOCAL_VERBOSE   0\r
-\r
-void SetThreeWayWeightsMuscle(MSA &msa)\r
-       {\r
-       if (NULL_NEIGHBOR == g_uTreeSplitNode1 || NULL_NEIGHBOR == g_uTreeSplitNode2)\r
-               {\r
-               msa.SetHenikoffWeightsPB();\r
-               return;\r
-               }\r
-\r
-       const unsigned uMuscleSeqCount = g_ptrMuscleTree->GetLeafCount();\r
-       WEIGHT *Weights = new WEIGHT[uMuscleSeqCount];\r
-\r
-       CalcThreeWayWeights(*g_ptrMuscleTree, g_uTreeSplitNode1, g_uTreeSplitNode2,\r
-         Weights);\r
-\r
-       const unsigned uMSASeqCount = msa.GetSeqCount();\r
-       for (unsigned uSeqIndex = 0; uSeqIndex < uMSASeqCount; ++uSeqIndex)\r
-               {\r
-               const unsigned uId = msa.GetSeqId(uSeqIndex);\r
-               if (uId >= uMuscleSeqCount)\r
-                       Quit("SetThreeWayWeightsMuscle: id out of range");\r
-               msa.SetSeqWeight(uSeqIndex, Weights[uId]);\r
-               }\r
-#if    LOCAL_VERBOSE\r
-       {\r
-       Log("SetThreeWayWeightsMuscle\n");\r
-       for (unsigned n = 0; n < uMSASeqCount; ++n)\r
-               {\r
-               const unsigned uId = msa.GetSeqId(n);\r
-               Log("%20.20s %6.3f\n", msa.GetSeqName(n), Weights[uId]);\r
-               }\r
-       }\r
-#endif\r
-       msa.NormalizeWeights((WEIGHT) 1.0);\r
-\r
-       delete[] Weights;\r
-       }\r
-\r
-// Append msa2 at the end of msa1\r
-void MSAAppend(MSA &msa1, const MSA &msa2)\r
-       {\r
-       const unsigned uSeqCount = msa1.GetSeqCount();\r
-\r
-       const unsigned uColCount1 = msa1.GetColCount();\r
-       const unsigned uColCount2 = msa2.GetColCount();\r
-       const unsigned uColCountCat = uColCount1 + uColCount2;\r
-\r
-       for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)\r
-               {\r
-               unsigned uId = msa1.GetSeqId(uSeqIndex);\r
-               unsigned uSeqIndex2 = msa2.GetSeqIndex(uId);\r
-               for (unsigned uColIndex = 0; uColIndex < uColCount2; ++uColIndex)\r
-                       {\r
-                       const char c = msa2.GetChar(uSeqIndex2, uColIndex);\r
-                       msa1.SetChar(uSeqIndex, uColCount1 + uColIndex, c);\r
-                       }\r
-               }\r
-       }\r
-\r
-// "Catenate" two MSAs (by bad analogy with UNIX cat command).\r
-// msa1 and msa2 must have same sequence names, but possibly\r
-// in a different order.\r
-// msaCat is the combined alignment produce by appending\r
-// sequences in msa2 to sequences in msa1.\r
-void MSACat(const MSA &msa1, const MSA &msa2, MSA &msaCat)\r
-       {\r
-       const unsigned uSeqCount = msa1.GetSeqCount();\r
-\r
-       const unsigned uColCount1 = msa1.GetColCount();\r
-       const unsigned uColCount2 = msa2.GetColCount();\r
-       const unsigned uColCountCat = uColCount1 + uColCount2;\r
-\r
-       msaCat.SetSize(uSeqCount, uColCountCat);\r
-\r
-       for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)\r
-               {\r
-               for (unsigned uColIndex = 0; uColIndex < uColCount1; ++uColIndex)\r
-                       {\r
-                       const char c = msa1.GetChar(uSeqIndex, uColIndex);\r
-                       msaCat.SetChar(uSeqIndex, uColIndex, c);\r
-                       }\r
-\r
-               const char *ptrSeqName = msa1.GetSeqName(uSeqIndex);\r
-               unsigned uSeqIndex2;\r
-               msaCat.SetSeqName(uSeqIndex, ptrSeqName);\r
-               bool bFound = msa2.GetSeqIndex(ptrSeqName, &uSeqIndex2);\r
-               if (bFound)\r
-                       {\r
-                       for (unsigned uColIndex = 0; uColIndex < uColCount2; ++uColIndex)\r
-                               {\r
-                               const char c = msa2.GetChar(uSeqIndex2, uColIndex);\r
-                               msaCat.SetChar(uSeqIndex, uColCount1 + uColIndex, c);\r
-                               }\r
-                       }\r
-               else\r
-                       {\r
-                       for (unsigned uColIndex = 0; uColIndex < uColCount2; ++uColIndex)\r
-                               msaCat.SetChar(uSeqIndex, uColCount1 + uColIndex, '-');\r
-                       }\r
-               }\r
-       }\r