--- /dev/null
+#include "muscle.h"\r
+#include "profile.h"\r
+\r
+#define TRACE 0\r
+\r
+enum\r
+ {\r
+ LL = 0,\r
+ LG = 1,\r
+ GL = 2,\r
+ GG = 3,\r
+ };\r
+\r
+static const char *GapTypeToStr(int GapType)\r
+ {\r
+ switch (GapType)\r
+ {\r
+ case LL: return "LL";\r
+ case LG: return "LG";\r
+ case GL: return "GL";\r
+ case GG: return "GG";\r
+ }\r
+ Quit("Invalid gap type");\r
+ return "?";\r
+ }\r
+\r
+static SCORE GapScoreMatrix[4][4];\r
+\r
+static void InitGapScoreMatrix()\r
+ {\r
+ const SCORE t = (SCORE) 0.2;\r
+\r
+ GapScoreMatrix[LL][LL] = 0;\r
+ GapScoreMatrix[LL][LG] = g_scoreGapOpen;\r
+ GapScoreMatrix[LL][GL] = 0;\r
+ GapScoreMatrix[LL][GG] = 0;\r
+\r
+ GapScoreMatrix[LG][LL] = g_scoreGapOpen;\r
+ GapScoreMatrix[LG][LG] = 0;\r
+ GapScoreMatrix[LG][GL] = g_scoreGapOpen;\r
+ GapScoreMatrix[LG][GG] = t*g_scoreGapOpen; // approximation!\r
+\r
+ GapScoreMatrix[GL][LL] = 0;\r
+ GapScoreMatrix[GL][LG] = g_scoreGapOpen;\r
+ GapScoreMatrix[GL][GL] = 0;\r
+ GapScoreMatrix[GL][GG] = 0;\r
+\r
+ GapScoreMatrix[GG][LL] = 0;\r
+ GapScoreMatrix[GG][LG] = t*g_scoreGapOpen; // approximation!\r
+ GapScoreMatrix[GG][GL] = 0;\r
+ GapScoreMatrix[GG][GG] = 0;\r
+\r
+ for (int i = 0; i < 4; ++i)\r
+ for (int j = 0; j < i; ++j)\r
+ if (GapScoreMatrix[i][j] != GapScoreMatrix[j][i])\r
+ Quit("GapScoreMatrix not symmetrical");\r
+ }\r
+\r
+static SCORE SPColBrute(const MSA &msa, unsigned uColIndex)\r
+ {\r
+ SCORE Sum = 0;\r
+ const unsigned uSeqCount = msa.GetSeqCount();\r
+ for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1)\r
+ {\r
+ const WEIGHT w1 = msa.GetSeqWeight(uSeqIndex1);\r
+ unsigned uLetter1 = msa.GetLetterEx(uSeqIndex1, uColIndex);\r
+ if (uLetter1 >= 20)\r
+ continue;\r
+ for (unsigned uSeqIndex2 = 0; uSeqIndex2 < uSeqIndex1; ++uSeqIndex2)\r
+ {\r
+ const WEIGHT w2 = msa.GetSeqWeight(uSeqIndex2);\r
+ unsigned uLetter2 = msa.GetLetterEx(uSeqIndex2, uColIndex);\r
+ if (uLetter2 >= 20)\r
+ continue;\r
+ SCORE t = w1*w2*(*g_ptrScoreMatrix)[uLetter1][uLetter2];\r
+#if TRACE\r
+ Log("Check %c %c w1=%.3g w2=%.3g Mx=%.3g t=%.3g\n",\r
+ LetterToCharAmino(uLetter1),\r
+ LetterToCharAmino(uLetter2),\r
+ w1,\r
+ w2,\r
+ (*g_ptrScoreMatrix)[uLetter1][uLetter2],\r
+ t);\r
+#endif\r
+ Sum += t;\r
+ }\r
+ }\r
+ return Sum;\r
+ }\r
+\r
+static SCORE SPGapFreqs(const FCOUNT Freqs[])\r
+ {\r
+#if TRACE\r
+ Log("Freqs=");\r
+ for (unsigned i = 0; i < 4; ++i)\r
+ if (Freqs[i] != 0)\r
+ Log(" %s=%.3g", GapTypeToStr(i), Freqs[i]);\r
+ Log("\n");\r
+#endif\r
+\r
+ SCORE TotalOffDiag = 0;\r
+ SCORE TotalDiag = 0;\r
+ for (unsigned i = 0; i < 4; ++i)\r
+ {\r
+ const FCOUNT fi = Freqs[i];\r
+ if (0 == fi)\r
+ continue;\r
+ const float *Row = GapScoreMatrix[i];\r
+ SCORE diagt = fi*fi*Row[i];\r
+ TotalDiag += diagt;\r
+#if TRACE\r
+ Log("SPFGaps %s %s + Mx=%.3g TotalDiag += %.3g\n",\r
+ GapTypeToStr(i),\r
+ GapTypeToStr(i),\r
+ Row[i],\r
+ diagt);\r
+#endif\r
+ SCORE Sum = 0;\r
+ for (unsigned j = 0; j < i; ++j)\r
+ {\r
+ SCORE t = Freqs[j]*Row[j];\r
+#if TRACE\r
+ if (Freqs[j] != 0)\r
+ Log("SPFGaps %s %s + Mx=%.3g Sum += %.3g\n",\r
+ GapTypeToStr(i),\r
+ GapTypeToStr(j),\r
+ Row[j],\r
+ fi*t);\r
+#endif\r
+ Sum += t;\r
+ }\r
+ TotalOffDiag += fi*Sum;\r
+ }\r
+#if TRACE\r
+ Log("SPFGap TotalOffDiag=%.3g + TotalDiag=%.3g = %.3g\n",\r
+ TotalOffDiag, TotalDiag, TotalOffDiag + TotalDiag);\r
+#endif\r
+ return TotalOffDiag*2 + TotalDiag;\r
+ }\r
+\r
+static SCORE SPFreqs(const FCOUNT Freqs[])\r
+ {\r
+#if TRACE\r
+ Log("Freqs=");\r
+ for (unsigned i = 0; i < 20; ++i)\r
+ if (Freqs[i] != 0)\r
+ Log(" %c=%.3g", LetterToCharAmino(i), Freqs[i]);\r
+ Log("\n");\r
+#endif\r
+\r
+ SCORE TotalOffDiag = 0;\r
+ SCORE TotalDiag = 0;\r
+ for (unsigned i = 0; i < 20; ++i)\r
+ {\r
+ const FCOUNT fi = Freqs[i];\r
+ if (0 == fi)\r
+ continue;\r
+ const float *Row = (*g_ptrScoreMatrix)[i];\r
+ SCORE diagt = fi*fi*Row[i];\r
+ TotalDiag += diagt;\r
+#if TRACE\r
+ Log("SPF %c %c + Mx=%.3g TotalDiag += %.3g\n",\r
+ LetterToCharAmino(i),\r
+ LetterToCharAmino(i),\r
+ Row[i],\r
+ diagt);\r
+#endif\r
+ SCORE Sum = 0;\r
+ for (unsigned j = 0; j < i; ++j)\r
+ {\r
+ SCORE t = Freqs[j]*Row[j];\r
+#if TRACE\r
+ if (Freqs[j] != 0)\r
+ Log("SPF %c %c + Mx=%.3g Sum += %.3g\n",\r
+ LetterToCharAmino(i),\r
+ LetterToCharAmino(j),\r
+ Row[j],\r
+ fi*t);\r
+#endif\r
+ Sum += t;\r
+ }\r
+ TotalOffDiag += fi*Sum;\r
+ }\r
+#if TRACE\r
+ Log("SPF TotalOffDiag=%.3g + TotalDiag=%.3g = %.3g\n",\r
+ TotalOffDiag, TotalDiag, TotalOffDiag + TotalDiag);\r
+#endif\r
+ return TotalOffDiag*2 + TotalDiag;\r
+ }\r
+\r
+static SCORE ObjScoreSPCol(const MSA &msa, unsigned uColIndex)\r
+ {\r
+ FCOUNT Freqs[20];\r
+ FCOUNT GapFreqs[4];\r
+\r
+ memset(Freqs, 0, sizeof(Freqs));\r
+ memset(GapFreqs, 0, sizeof(GapFreqs));\r
+\r
+ const unsigned uSeqCount = msa.GetSeqCount();\r
+#if TRACE\r
+ Log("Weights=");\r
+ for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)\r
+ Log(" %u=%.3g", uSeqIndex, msa.GetSeqWeight(uSeqIndex));\r
+ Log("\n");\r
+#endif\r
+ SCORE SelfOverCount = 0;\r
+ SCORE GapSelfOverCount = 0;\r
+ for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)\r
+ {\r
+ WEIGHT w = msa.GetSeqWeight(uSeqIndex);\r
+\r
+ bool bGapThisCol = msa.IsGap(uSeqIndex, uColIndex);\r
+ bool bGapPrevCol = (uColIndex == 0 ? false : msa.IsGap(uSeqIndex, uColIndex - 1));\r
+ int GapType = bGapThisCol + 2*bGapPrevCol;\r
+ assert(GapType >= 0 && GapType < 4);\r
+ GapFreqs[GapType] += w;\r
+ SCORE gapt = w*w*GapScoreMatrix[GapType][GapType];\r
+ GapSelfOverCount += gapt;\r
+\r
+ if (bGapThisCol)\r
+ continue;\r
+ unsigned uLetter = msa.GetLetterEx(uSeqIndex, uColIndex);\r
+ if (uLetter >= 20)\r
+ continue;\r
+ Freqs[uLetter] += w;\r
+ SCORE t = w*w*(*g_ptrScoreMatrix)[uLetter][uLetter];\r
+#if TRACE\r
+ Log("FastCol compute freqs & SelfOverCount %c w=%.3g M=%.3g SelfOverCount += %.3g\n",\r
+ LetterToCharAmino(uLetter), w, (*g_ptrScoreMatrix)[uLetter][uLetter], t);\r
+#endif\r
+ SelfOverCount += t;\r
+ }\r
+ SCORE SPF = SPFreqs(Freqs);\r
+ SCORE Col = SPF - SelfOverCount;\r
+\r
+ SCORE SPFGaps = SPGapFreqs(GapFreqs);\r
+ SCORE ColGaps = SPFGaps - GapSelfOverCount;\r
+#if TRACE\r
+ Log("SPF=%.3g - SelfOverCount=%.3g = %.3g\n", SPF, SelfOverCount, Col);\r
+ Log("SPFGaps=%.3g - GapsSelfOverCount=%.3g = %.3g\n", SPFGaps, GapSelfOverCount, ColGaps);\r
+#endif\r
+ return Col + ColGaps;\r
+ }\r
+\r
+SCORE ObjScoreSPDimer(const MSA &msa)\r
+ {\r
+ static bool bGapScoreMatrixInit = false;\r
+ if (!bGapScoreMatrixInit)\r
+ InitGapScoreMatrix();\r
+\r
+ SCORE Total = 0;\r
+ const unsigned uSeqCount = msa.GetSeqCount();\r
+ const unsigned uColCount = msa.GetColCount();\r
+ for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)\r
+ {\r
+ SCORE Col = ObjScoreSPCol(msa, uColIndex);\r
+#if TRACE\r
+ {\r
+ SCORE ColCheck = SPColBrute(msa, uColIndex);\r
+ Log("FastCol=%.3g CheckCol=%.3g\n", Col, ColCheck);\r
+ }\r
+#endif\r
+ Total += Col;\r
+ }\r
+#if TRACE\r
+ Log("Total/2 = %.3g (final result from fast)\n", Total/2);\r
+#endif\r
+ return Total/2;\r
+ }\r