14 static char *GapTypeToStr(int GapType)
\r
18 case LL: return "LL";
\r
19 case LG: return "LG";
\r
20 case GL: return "GL";
\r
21 case GG: return "GG";
\r
23 Quit("Invalid gap type");
\r
27 static SCORE GapScoreMatrix[4][4];
\r
29 static void InitGapScoreMatrix()
\r
31 const SCORE t = (SCORE) 0.2;
\r
33 GapScoreMatrix[LL][LL] = 0;
\r
34 GapScoreMatrix[LL][LG] = g_scoreGapOpen;
\r
35 GapScoreMatrix[LL][GL] = 0;
\r
36 GapScoreMatrix[LL][GG] = 0;
\r
38 GapScoreMatrix[LG][LL] = g_scoreGapOpen;
\r
39 GapScoreMatrix[LG][LG] = 0;
\r
40 GapScoreMatrix[LG][GL] = g_scoreGapOpen;
\r
41 GapScoreMatrix[LG][GG] = t*g_scoreGapOpen; // approximation!
\r
43 GapScoreMatrix[GL][LL] = 0;
\r
44 GapScoreMatrix[GL][LG] = g_scoreGapOpen;
\r
45 GapScoreMatrix[GL][GL] = 0;
\r
46 GapScoreMatrix[GL][GG] = 0;
\r
48 GapScoreMatrix[GG][LL] = 0;
\r
49 GapScoreMatrix[GG][LG] = t*g_scoreGapOpen; // approximation!
\r
50 GapScoreMatrix[GG][GL] = 0;
\r
51 GapScoreMatrix[GG][GG] = 0;
\r
53 for (int i = 0; i < 4; ++i)
\r
54 for (int j = 0; j < i; ++j)
\r
55 if (GapScoreMatrix[i][j] != GapScoreMatrix[j][i])
\r
56 Quit("GapScoreMatrix not symmetrical");
\r
59 static SCORE SPColBrute(const MSA &msa, unsigned uColIndex)
\r
62 const unsigned uSeqCount = msa.GetSeqCount();
\r
63 for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1)
\r
65 const WEIGHT w1 = msa.GetSeqWeight(uSeqIndex1);
\r
66 unsigned uLetter1 = msa.GetLetterEx(uSeqIndex1, uColIndex);
\r
69 for (unsigned uSeqIndex2 = 0; uSeqIndex2 < uSeqIndex1; ++uSeqIndex2)
\r
71 const WEIGHT w2 = msa.GetSeqWeight(uSeqIndex2);
\r
72 unsigned uLetter2 = msa.GetLetterEx(uSeqIndex2, uColIndex);
\r
75 SCORE t = w1*w2*(*g_ptrScoreMatrix)[uLetter1][uLetter2];
\r
77 Log("Check %c %c w1=%.3g w2=%.3g Mx=%.3g t=%.3g\n",
\r
78 LetterToCharAmino(uLetter1),
\r
79 LetterToCharAmino(uLetter2),
\r
82 (*g_ptrScoreMatrix)[uLetter1][uLetter2],
\r
91 static SCORE SPGapFreqs(const FCOUNT Freqs[])
\r
95 for (unsigned i = 0; i < 4; ++i)
\r
97 Log(" %s=%.3g", GapTypeToStr(i), Freqs[i]);
\r
101 SCORE TotalOffDiag = 0;
\r
102 SCORE TotalDiag = 0;
\r
103 for (unsigned i = 0; i < 4; ++i)
\r
105 const FCOUNT fi = Freqs[i];
\r
108 const float *Row = GapScoreMatrix[i];
\r
109 SCORE diagt = fi*fi*Row[i];
\r
110 TotalDiag += diagt;
\r
112 Log("SPFGaps %s %s + Mx=%.3g TotalDiag += %.3g\n",
\r
119 for (unsigned j = 0; j < i; ++j)
\r
121 SCORE t = Freqs[j]*Row[j];
\r
124 Log("SPFGaps %s %s + Mx=%.3g Sum += %.3g\n",
\r
132 TotalOffDiag += fi*Sum;
\r
135 Log("SPFGap TotalOffDiag=%.3g + TotalDiag=%.3g = %.3g\n",
\r
136 TotalOffDiag, TotalDiag, TotalOffDiag + TotalDiag);
\r
138 return TotalOffDiag*2 + TotalDiag;
\r
141 static SCORE SPFreqs(const FCOUNT Freqs[])
\r
145 for (unsigned i = 0; i < 20; ++i)
\r
147 Log(" %c=%.3g", LetterToCharAmino(i), Freqs[i]);
\r
151 SCORE TotalOffDiag = 0;
\r
152 SCORE TotalDiag = 0;
\r
153 for (unsigned i = 0; i < 20; ++i)
\r
155 const FCOUNT fi = Freqs[i];
\r
158 const float *Row = (*g_ptrScoreMatrix)[i];
\r
159 SCORE diagt = fi*fi*Row[i];
\r
160 TotalDiag += diagt;
\r
162 Log("SPF %c %c + Mx=%.3g TotalDiag += %.3g\n",
\r
163 LetterToCharAmino(i),
\r
164 LetterToCharAmino(i),
\r
169 for (unsigned j = 0; j < i; ++j)
\r
171 SCORE t = Freqs[j]*Row[j];
\r
174 Log("SPF %c %c + Mx=%.3g Sum += %.3g\n",
\r
175 LetterToCharAmino(i),
\r
176 LetterToCharAmino(j),
\r
182 TotalOffDiag += fi*Sum;
\r
185 Log("SPF TotalOffDiag=%.3g + TotalDiag=%.3g = %.3g\n",
\r
186 TotalOffDiag, TotalDiag, TotalOffDiag + TotalDiag);
\r
188 return TotalOffDiag*2 + TotalDiag;
\r
191 static SCORE ObjScoreSPCol(const MSA &msa, unsigned uColIndex)
\r
194 FCOUNT GapFreqs[4];
\r
196 memset(Freqs, 0, sizeof(Freqs));
\r
197 memset(GapFreqs, 0, sizeof(GapFreqs));
\r
199 const unsigned uSeqCount = msa.GetSeqCount();
\r
202 for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
\r
203 Log(" %u=%.3g", uSeqIndex, msa.GetSeqWeight(uSeqIndex));
\r
206 SCORE SelfOverCount = 0;
\r
207 SCORE GapSelfOverCount = 0;
\r
208 for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
\r
210 WEIGHT w = msa.GetSeqWeight(uSeqIndex);
\r
212 bool bGapThisCol = msa.IsGap(uSeqIndex, uColIndex);
\r
213 bool bGapPrevCol = (uColIndex == 0 ? false : msa.IsGap(uSeqIndex, uColIndex - 1));
\r
214 int GapType = bGapThisCol + 2*bGapPrevCol;
\r
215 assert(GapType >= 0 && GapType < 4);
\r
216 GapFreqs[GapType] += w;
\r
217 SCORE gapt = w*w*GapScoreMatrix[GapType][GapType];
\r
218 GapSelfOverCount += gapt;
\r
222 unsigned uLetter = msa.GetLetterEx(uSeqIndex, uColIndex);
\r
225 Freqs[uLetter] += w;
\r
226 SCORE t = w*w*(*g_ptrScoreMatrix)[uLetter][uLetter];
\r
228 Log("FastCol compute freqs & SelfOverCount %c w=%.3g M=%.3g SelfOverCount += %.3g\n",
\r
229 LetterToCharAmino(uLetter), w, (*g_ptrScoreMatrix)[uLetter][uLetter], t);
\r
231 SelfOverCount += t;
\r
233 SCORE SPF = SPFreqs(Freqs);
\r
234 SCORE Col = SPF - SelfOverCount;
\r
236 SCORE SPFGaps = SPGapFreqs(GapFreqs);
\r
237 SCORE ColGaps = SPFGaps - GapSelfOverCount;
\r
239 Log("SPF=%.3g - SelfOverCount=%.3g = %.3g\n", SPF, SelfOverCount, Col);
\r
240 Log("SPFGaps=%.3g - GapsSelfOverCount=%.3g = %.3g\n", SPFGaps, GapSelfOverCount, ColGaps);
\r
242 return Col + ColGaps;
\r
245 SCORE ObjScoreSPDimer(const MSA &msa)
\r
247 static bool bGapScoreMatrixInit = false;
\r
248 if (!bGapScoreMatrixInit)
\r
249 InitGapScoreMatrix();
\r
252 const unsigned uSeqCount = msa.GetSeqCount();
\r
253 const unsigned uColCount = msa.GetColCount();
\r
254 for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
\r
256 SCORE Col = ObjScoreSPCol(msa, uColIndex);
\r
259 SCORE ColCheck = SPColBrute(msa, uColIndex);
\r
260 Log("FastCol=%.3g CheckCol=%.3g\n", Col, ColCheck);
\r
266 Log("Total/2 = %.3g (final result from fast)\n", Total/2);
\r