62c0f1bd71b18394f8b72897e6c2bb44c2c1c35e
[jabaws.git] / binaries / src / muscle / spfast.cpp
1 #include "muscle.h"\r
2 #include "profile.h"\r
3 \r
4 #define TRACE   0\r
5 \r
6 enum\r
7         {\r
8         LL = 0,\r
9         LG = 1,\r
10         GL = 2,\r
11         GG = 3,\r
12         };\r
13 \r
14 static char *GapTypeToStr(int GapType)\r
15         {\r
16         switch (GapType)\r
17                 {\r
18         case LL: return "LL";\r
19         case LG: return "LG";\r
20         case GL: return "GL";\r
21         case GG: return "GG";\r
22                 }\r
23         Quit("Invalid gap type");\r
24         return "?";\r
25         }\r
26 \r
27 static SCORE GapScoreMatrix[4][4];\r
28 \r
29 static void InitGapScoreMatrix()\r
30         {\r
31         const SCORE t = (SCORE) 0.2;\r
32 \r
33         GapScoreMatrix[LL][LL] = 0;\r
34         GapScoreMatrix[LL][LG] = g_scoreGapOpen;\r
35         GapScoreMatrix[LL][GL] = 0;\r
36         GapScoreMatrix[LL][GG] = 0;\r
37 \r
38         GapScoreMatrix[LG][LL] = g_scoreGapOpen;\r
39         GapScoreMatrix[LG][LG] = 0;\r
40         GapScoreMatrix[LG][GL] = g_scoreGapOpen;\r
41         GapScoreMatrix[LG][GG] = t*g_scoreGapOpen;      // approximation!\r
42 \r
43         GapScoreMatrix[GL][LL] = 0;\r
44         GapScoreMatrix[GL][LG] = g_scoreGapOpen;\r
45         GapScoreMatrix[GL][GL] = 0;\r
46         GapScoreMatrix[GL][GG] = 0;\r
47 \r
48         GapScoreMatrix[GG][LL] = 0;\r
49         GapScoreMatrix[GG][LG] = t*g_scoreGapOpen;      // approximation!\r
50         GapScoreMatrix[GG][GL] = 0;\r
51         GapScoreMatrix[GG][GG] = 0;\r
52 \r
53         for (int i = 0; i < 4; ++i)\r
54                 for (int j = 0; j < i; ++j)\r
55                         if (GapScoreMatrix[i][j] != GapScoreMatrix[j][i])\r
56                                 Quit("GapScoreMatrix not symmetrical");\r
57         }\r
58 \r
59 static SCORE SPColBrute(const MSA &msa, unsigned uColIndex)\r
60         {\r
61         SCORE Sum = 0;\r
62         const unsigned uSeqCount = msa.GetSeqCount();\r
63         for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1)\r
64                 {\r
65                 const WEIGHT w1 = msa.GetSeqWeight(uSeqIndex1);\r
66                 unsigned uLetter1 = msa.GetLetterEx(uSeqIndex1, uColIndex);\r
67                 if (uLetter1 >= 20)\r
68                         continue;\r
69                 for (unsigned uSeqIndex2 = 0; uSeqIndex2 < uSeqIndex1; ++uSeqIndex2)\r
70                         {\r
71                         const WEIGHT w2 = msa.GetSeqWeight(uSeqIndex2);\r
72                         unsigned uLetter2 = msa.GetLetterEx(uSeqIndex2, uColIndex);\r
73                         if (uLetter2 >= 20)\r
74                                 continue;\r
75                         SCORE t = w1*w2*(*g_ptrScoreMatrix)[uLetter1][uLetter2];\r
76 #if     TRACE\r
77                         Log("Check %c %c w1=%.3g w2=%.3g Mx=%.3g t=%.3g\n",\r
78                           LetterToCharAmino(uLetter1),\r
79                           LetterToCharAmino(uLetter2),\r
80                           w1,\r
81                           w2,\r
82                           (*g_ptrScoreMatrix)[uLetter1][uLetter2],\r
83                           t);\r
84 #endif\r
85                         Sum += t;\r
86                         }\r
87                 }\r
88         return Sum;\r
89         }\r
90 \r
91 static SCORE SPGapFreqs(const FCOUNT Freqs[])\r
92         {\r
93 #if TRACE\r
94         Log("Freqs=");\r
95         for (unsigned i = 0; i < 4; ++i)\r
96                 if (Freqs[i] != 0)\r
97                         Log(" %s=%.3g", GapTypeToStr(i), Freqs[i]);\r
98         Log("\n");\r
99 #endif\r
100 \r
101         SCORE TotalOffDiag = 0;\r
102         SCORE TotalDiag = 0;\r
103         for (unsigned i = 0; i < 4; ++i)\r
104                 {\r
105                 const FCOUNT fi = Freqs[i];\r
106                 if (0 == fi)\r
107                         continue;\r
108                 const float *Row = GapScoreMatrix[i];\r
109                 SCORE diagt = fi*fi*Row[i];\r
110                 TotalDiag += diagt;\r
111 #if     TRACE\r
112                 Log("SPFGaps %s %s + Mx=%.3g TotalDiag += %.3g\n",\r
113                   GapTypeToStr(i),\r
114                   GapTypeToStr(i),\r
115                   Row[i],\r
116                   diagt);\r
117 #endif\r
118                 SCORE Sum = 0;\r
119                 for (unsigned j = 0; j < i; ++j)\r
120                         {\r
121                         SCORE t = Freqs[j]*Row[j];\r
122 #if     TRACE\r
123                         if (Freqs[j] != 0)\r
124                                 Log("SPFGaps %s %s + Mx=%.3g Sum += %.3g\n",\r
125                                   GapTypeToStr(i),\r
126                                   GapTypeToStr(j),\r
127                                   Row[j],\r
128                                   fi*t);\r
129 #endif\r
130                         Sum += t;\r
131                         }\r
132                 TotalOffDiag += fi*Sum;\r
133                 }\r
134 #if TRACE\r
135         Log("SPFGap TotalOffDiag=%.3g + TotalDiag=%.3g = %.3g\n",\r
136           TotalOffDiag, TotalDiag, TotalOffDiag + TotalDiag);\r
137 #endif\r
138         return TotalOffDiag*2 + TotalDiag;\r
139         }\r
140 \r
141 static SCORE SPFreqs(const FCOUNT Freqs[])\r
142         {\r
143 #if TRACE\r
144         Log("Freqs=");\r
145         for (unsigned i = 0; i < 20; ++i)\r
146                 if (Freqs[i] != 0)\r
147                         Log(" %c=%.3g", LetterToCharAmino(i), Freqs[i]);\r
148         Log("\n");\r
149 #endif\r
150 \r
151         SCORE TotalOffDiag = 0;\r
152         SCORE TotalDiag = 0;\r
153         for (unsigned i = 0; i < 20; ++i)\r
154                 {\r
155                 const FCOUNT fi = Freqs[i];\r
156                 if (0 == fi)\r
157                         continue;\r
158                 const float *Row = (*g_ptrScoreMatrix)[i];\r
159                 SCORE diagt = fi*fi*Row[i];\r
160                 TotalDiag += diagt;\r
161 #if     TRACE\r
162                 Log("SPF %c %c + Mx=%.3g TotalDiag += %.3g\n",\r
163                   LetterToCharAmino(i),\r
164                   LetterToCharAmino(i),\r
165                   Row[i],\r
166                   diagt);\r
167 #endif\r
168                 SCORE Sum = 0;\r
169                 for (unsigned j = 0; j < i; ++j)\r
170                         {\r
171                         SCORE t = Freqs[j]*Row[j];\r
172 #if     TRACE\r
173                         if (Freqs[j] != 0)\r
174                                 Log("SPF %c %c + Mx=%.3g Sum += %.3g\n",\r
175                                   LetterToCharAmino(i),\r
176                                   LetterToCharAmino(j),\r
177                                   Row[j],\r
178                                   fi*t);\r
179 #endif\r
180                         Sum += t;\r
181                         }\r
182                 TotalOffDiag += fi*Sum;\r
183                 }\r
184 #if TRACE\r
185         Log("SPF TotalOffDiag=%.3g + TotalDiag=%.3g = %.3g\n",\r
186           TotalOffDiag, TotalDiag, TotalOffDiag + TotalDiag);\r
187 #endif\r
188         return TotalOffDiag*2 + TotalDiag;\r
189         }\r
190 \r
191 static SCORE ObjScoreSPCol(const MSA &msa, unsigned uColIndex)\r
192         {\r
193         FCOUNT Freqs[20];\r
194         FCOUNT GapFreqs[4];\r
195 \r
196         memset(Freqs, 0, sizeof(Freqs));\r
197         memset(GapFreqs, 0, sizeof(GapFreqs));\r
198 \r
199         const unsigned uSeqCount = msa.GetSeqCount();\r
200 #if     TRACE\r
201         Log("Weights=");\r
202         for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)\r
203                 Log(" %u=%.3g", uSeqIndex, msa.GetSeqWeight(uSeqIndex));\r
204         Log("\n");\r
205 #endif\r
206         SCORE SelfOverCount = 0;\r
207         SCORE GapSelfOverCount = 0;\r
208         for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)\r
209                 {\r
210                 WEIGHT w = msa.GetSeqWeight(uSeqIndex);\r
211 \r
212                 bool bGapThisCol = msa.IsGap(uSeqIndex, uColIndex);\r
213                 bool bGapPrevCol = (uColIndex == 0 ? false : msa.IsGap(uSeqIndex, uColIndex - 1));\r
214                 int GapType = bGapThisCol + 2*bGapPrevCol;\r
215                 assert(GapType >= 0 && GapType < 4);\r
216                 GapFreqs[GapType] += w;\r
217                 SCORE gapt = w*w*GapScoreMatrix[GapType][GapType];\r
218                 GapSelfOverCount += gapt;\r
219 \r
220                 if (bGapThisCol)\r
221                         continue;\r
222                 unsigned uLetter = msa.GetLetterEx(uSeqIndex, uColIndex);\r
223                 if (uLetter >= 20)\r
224                         continue;\r
225                 Freqs[uLetter] += w;\r
226                 SCORE t = w*w*(*g_ptrScoreMatrix)[uLetter][uLetter];\r
227 #if     TRACE\r
228                 Log("FastCol compute freqs & SelfOverCount %c w=%.3g M=%.3g SelfOverCount += %.3g\n",\r
229                   LetterToCharAmino(uLetter), w, (*g_ptrScoreMatrix)[uLetter][uLetter], t);\r
230 #endif\r
231                 SelfOverCount += t;\r
232                 }\r
233         SCORE SPF = SPFreqs(Freqs);\r
234         SCORE Col = SPF - SelfOverCount;\r
235 \r
236         SCORE SPFGaps = SPGapFreqs(GapFreqs);\r
237         SCORE ColGaps = SPFGaps - GapSelfOverCount;\r
238 #if     TRACE\r
239         Log("SPF=%.3g - SelfOverCount=%.3g = %.3g\n", SPF, SelfOverCount, Col);\r
240         Log("SPFGaps=%.3g - GapsSelfOverCount=%.3g = %.3g\n", SPFGaps, GapSelfOverCount, ColGaps);\r
241 #endif\r
242         return Col + ColGaps;\r
243         }\r
244 \r
245 SCORE ObjScoreSPDimer(const MSA &msa)\r
246         {\r
247         static bool bGapScoreMatrixInit = false;\r
248         if (!bGapScoreMatrixInit)\r
249                 InitGapScoreMatrix();\r
250 \r
251         SCORE Total = 0;\r
252         const unsigned uSeqCount = msa.GetSeqCount();\r
253         const unsigned uColCount = msa.GetColCount();\r
254         for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)\r
255                 {\r
256                 SCORE Col = ObjScoreSPCol(msa, uColIndex);\r
257 #if     TRACE\r
258                 {\r
259                 SCORE ColCheck = SPColBrute(msa, uColIndex);\r
260                 Log("FastCol=%.3g CheckCol=%.3g\n", Col, ColCheck);\r
261                 }\r
262 #endif\r
263                 Total += Col;\r
264                 }\r
265 #if TRACE\r
266         Log("Total/2 = %.3g (final result from fast)\n", Total/2);\r
267 #endif\r
268         return Total/2;\r
269         }\r