+++ /dev/null
-#include "muscle.h"\r
-#include "profile.h"\r
-\r
-#define TRACE 0\r
-\r
-enum\r
- {\r
- LL = 0,\r
- LG = 1,\r
- GL = 2,\r
- GG = 3,\r
- };\r
-\r
-static const char *GapTypeToStr(int GapType)\r
- {\r
- switch (GapType)\r
- {\r
- case LL: return "LL";\r
- case LG: return "LG";\r
- case GL: return "GL";\r
- case GG: return "GG";\r
- }\r
- Quit("Invalid gap type");\r
- return "?";\r
- }\r
-\r
-static SCORE GapScoreMatrix[4][4];\r
-\r
-static void InitGapScoreMatrix()\r
- {\r
- const SCORE t = (SCORE) 0.2;\r
-\r
- GapScoreMatrix[LL][LL] = 0;\r
- GapScoreMatrix[LL][LG] = g_scoreGapOpen;\r
- GapScoreMatrix[LL][GL] = 0;\r
- GapScoreMatrix[LL][GG] = 0;\r
-\r
- GapScoreMatrix[LG][LL] = g_scoreGapOpen;\r
- GapScoreMatrix[LG][LG] = 0;\r
- GapScoreMatrix[LG][GL] = g_scoreGapOpen;\r
- GapScoreMatrix[LG][GG] = t*g_scoreGapOpen; // approximation!\r
-\r
- GapScoreMatrix[GL][LL] = 0;\r
- GapScoreMatrix[GL][LG] = g_scoreGapOpen;\r
- GapScoreMatrix[GL][GL] = 0;\r
- GapScoreMatrix[GL][GG] = 0;\r
-\r
- GapScoreMatrix[GG][LL] = 0;\r
- GapScoreMatrix[GG][LG] = t*g_scoreGapOpen; // approximation!\r
- GapScoreMatrix[GG][GL] = 0;\r
- GapScoreMatrix[GG][GG] = 0;\r
-\r
- for (int i = 0; i < 4; ++i)\r
- for (int j = 0; j < i; ++j)\r
- if (GapScoreMatrix[i][j] != GapScoreMatrix[j][i])\r
- Quit("GapScoreMatrix not symmetrical");\r
- }\r
-\r
-static SCORE SPColBrute(const MSA &msa, unsigned uColIndex)\r
- {\r
- SCORE Sum = 0;\r
- const unsigned uSeqCount = msa.GetSeqCount();\r
- for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1)\r
- {\r
- const WEIGHT w1 = msa.GetSeqWeight(uSeqIndex1);\r
- unsigned uLetter1 = msa.GetLetterEx(uSeqIndex1, uColIndex);\r
- if (uLetter1 >= 20)\r
- continue;\r
- for (unsigned uSeqIndex2 = 0; uSeqIndex2 < uSeqIndex1; ++uSeqIndex2)\r
- {\r
- const WEIGHT w2 = msa.GetSeqWeight(uSeqIndex2);\r
- unsigned uLetter2 = msa.GetLetterEx(uSeqIndex2, uColIndex);\r
- if (uLetter2 >= 20)\r
- continue;\r
- SCORE t = w1*w2*(*g_ptrScoreMatrix)[uLetter1][uLetter2];\r
-#if TRACE\r
- Log("Check %c %c w1=%.3g w2=%.3g Mx=%.3g t=%.3g\n",\r
- LetterToCharAmino(uLetter1),\r
- LetterToCharAmino(uLetter2),\r
- w1,\r
- w2,\r
- (*g_ptrScoreMatrix)[uLetter1][uLetter2],\r
- t);\r
-#endif\r
- Sum += t;\r
- }\r
- }\r
- return Sum;\r
- }\r
-\r
-static SCORE SPGapFreqs(const FCOUNT Freqs[])\r
- {\r
-#if TRACE\r
- Log("Freqs=");\r
- for (unsigned i = 0; i < 4; ++i)\r
- if (Freqs[i] != 0)\r
- Log(" %s=%.3g", GapTypeToStr(i), Freqs[i]);\r
- Log("\n");\r
-#endif\r
-\r
- SCORE TotalOffDiag = 0;\r
- SCORE TotalDiag = 0;\r
- for (unsigned i = 0; i < 4; ++i)\r
- {\r
- const FCOUNT fi = Freqs[i];\r
- if (0 == fi)\r
- continue;\r
- const float *Row = GapScoreMatrix[i];\r
- SCORE diagt = fi*fi*Row[i];\r
- TotalDiag += diagt;\r
-#if TRACE\r
- Log("SPFGaps %s %s + Mx=%.3g TotalDiag += %.3g\n",\r
- GapTypeToStr(i),\r
- GapTypeToStr(i),\r
- Row[i],\r
- diagt);\r
-#endif\r
- SCORE Sum = 0;\r
- for (unsigned j = 0; j < i; ++j)\r
- {\r
- SCORE t = Freqs[j]*Row[j];\r
-#if TRACE\r
- if (Freqs[j] != 0)\r
- Log("SPFGaps %s %s + Mx=%.3g Sum += %.3g\n",\r
- GapTypeToStr(i),\r
- GapTypeToStr(j),\r
- Row[j],\r
- fi*t);\r
-#endif\r
- Sum += t;\r
- }\r
- TotalOffDiag += fi*Sum;\r
- }\r
-#if TRACE\r
- Log("SPFGap TotalOffDiag=%.3g + TotalDiag=%.3g = %.3g\n",\r
- TotalOffDiag, TotalDiag, TotalOffDiag + TotalDiag);\r
-#endif\r
- return TotalOffDiag*2 + TotalDiag;\r
- }\r
-\r
-static SCORE SPFreqs(const FCOUNT Freqs[])\r
- {\r
-#if TRACE\r
- Log("Freqs=");\r
- for (unsigned i = 0; i < 20; ++i)\r
- if (Freqs[i] != 0)\r
- Log(" %c=%.3g", LetterToCharAmino(i), Freqs[i]);\r
- Log("\n");\r
-#endif\r
-\r
- SCORE TotalOffDiag = 0;\r
- SCORE TotalDiag = 0;\r
- for (unsigned i = 0; i < 20; ++i)\r
- {\r
- const FCOUNT fi = Freqs[i];\r
- if (0 == fi)\r
- continue;\r
- const float *Row = (*g_ptrScoreMatrix)[i];\r
- SCORE diagt = fi*fi*Row[i];\r
- TotalDiag += diagt;\r
-#if TRACE\r
- Log("SPF %c %c + Mx=%.3g TotalDiag += %.3g\n",\r
- LetterToCharAmino(i),\r
- LetterToCharAmino(i),\r
- Row[i],\r
- diagt);\r
-#endif\r
- SCORE Sum = 0;\r
- for (unsigned j = 0; j < i; ++j)\r
- {\r
- SCORE t = Freqs[j]*Row[j];\r
-#if TRACE\r
- if (Freqs[j] != 0)\r
- Log("SPF %c %c + Mx=%.3g Sum += %.3g\n",\r
- LetterToCharAmino(i),\r
- LetterToCharAmino(j),\r
- Row[j],\r
- fi*t);\r
-#endif\r
- Sum += t;\r
- }\r
- TotalOffDiag += fi*Sum;\r
- }\r
-#if TRACE\r
- Log("SPF TotalOffDiag=%.3g + TotalDiag=%.3g = %.3g\n",\r
- TotalOffDiag, TotalDiag, TotalOffDiag + TotalDiag);\r
-#endif\r
- return TotalOffDiag*2 + TotalDiag;\r
- }\r
-\r
-static SCORE ObjScoreSPCol(const MSA &msa, unsigned uColIndex)\r
- {\r
- FCOUNT Freqs[20];\r
- FCOUNT GapFreqs[4];\r
-\r
- memset(Freqs, 0, sizeof(Freqs));\r
- memset(GapFreqs, 0, sizeof(GapFreqs));\r
-\r
- const unsigned uSeqCount = msa.GetSeqCount();\r
-#if TRACE\r
- Log("Weights=");\r
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)\r
- Log(" %u=%.3g", uSeqIndex, msa.GetSeqWeight(uSeqIndex));\r
- Log("\n");\r
-#endif\r
- SCORE SelfOverCount = 0;\r
- SCORE GapSelfOverCount = 0;\r
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)\r
- {\r
- WEIGHT w = msa.GetSeqWeight(uSeqIndex);\r
-\r
- bool bGapThisCol = msa.IsGap(uSeqIndex, uColIndex);\r
- bool bGapPrevCol = (uColIndex == 0 ? false : msa.IsGap(uSeqIndex, uColIndex - 1));\r
- int GapType = bGapThisCol + 2*bGapPrevCol;\r
- assert(GapType >= 0 && GapType < 4);\r
- GapFreqs[GapType] += w;\r
- SCORE gapt = w*w*GapScoreMatrix[GapType][GapType];\r
- GapSelfOverCount += gapt;\r
-\r
- if (bGapThisCol)\r
- continue;\r
- unsigned uLetter = msa.GetLetterEx(uSeqIndex, uColIndex);\r
- if (uLetter >= 20)\r
- continue;\r
- Freqs[uLetter] += w;\r
- SCORE t = w*w*(*g_ptrScoreMatrix)[uLetter][uLetter];\r
-#if TRACE\r
- Log("FastCol compute freqs & SelfOverCount %c w=%.3g M=%.3g SelfOverCount += %.3g\n",\r
- LetterToCharAmino(uLetter), w, (*g_ptrScoreMatrix)[uLetter][uLetter], t);\r
-#endif\r
- SelfOverCount += t;\r
- }\r
- SCORE SPF = SPFreqs(Freqs);\r
- SCORE Col = SPF - SelfOverCount;\r
-\r
- SCORE SPFGaps = SPGapFreqs(GapFreqs);\r
- SCORE ColGaps = SPFGaps - GapSelfOverCount;\r
-#if TRACE\r
- Log("SPF=%.3g - SelfOverCount=%.3g = %.3g\n", SPF, SelfOverCount, Col);\r
- Log("SPFGaps=%.3g - GapsSelfOverCount=%.3g = %.3g\n", SPFGaps, GapSelfOverCount, ColGaps);\r
-#endif\r
- return Col + ColGaps;\r
- }\r
-\r
-SCORE ObjScoreSPDimer(const MSA &msa)\r
- {\r
- static bool bGapScoreMatrixInit = false;\r
- if (!bGapScoreMatrixInit)\r
- InitGapScoreMatrix();\r
-\r
- SCORE Total = 0;\r
- const unsigned uSeqCount = msa.GetSeqCount();\r
- const unsigned uColCount = msa.GetColCount();\r
- for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)\r
- {\r
- SCORE Col = ObjScoreSPCol(msa, uColIndex);\r
-#if TRACE\r
- {\r
- SCORE ColCheck = SPColBrute(msa, uColIndex);\r
- Log("FastCol=%.3g CheckCol=%.3g\n", Col, ColCheck);\r
- }\r
-#endif\r
- Total += Col;\r
- }\r
-#if TRACE\r
- Log("Total/2 = %.3g (final result from fast)\n", Total/2);\r
-#endif\r
- return Total/2;\r
- }\r