new version of muscle 3.8.31
[jabaws.git] / binaries / src / muscle / glbalignla.cpp
1 #include "muscle.h"\r
2 #include "profile.h"\r
3 #include "pwpath.h"\r
4 \r
5 #define OCC     1\r
6 \r
7 struct DP_MEMORY\r
8         {\r
9         unsigned uLength;\r
10         SCORE *GapOpenA;\r
11         SCORE *GapOpenB;\r
12         SCORE *GapCloseA;\r
13         SCORE *GapCloseB;\r
14         SCORE *MPrev;\r
15         SCORE *MCurr;\r
16         SCORE *MWork;\r
17         SCORE *DPrev;\r
18         SCORE *DCurr;\r
19         SCORE *DWork;\r
20         SCORE **ScoreMxB;\r
21 #if     OCC\r
22         FCOUNT *OccA;\r
23         FCOUNT *OccB;\r
24 #endif\r
25         unsigned **SortOrderA;\r
26         unsigned *uDeletePos;\r
27         FCOUNT **FreqsA;\r
28         int **TraceBack;\r
29         };\r
30 \r
31 static struct DP_MEMORY DPM;\r
32 \r
33 static void AllocDPMem(unsigned uLengthA, unsigned uLengthB)\r
34         {\r
35 // Max prefix length\r
36         unsigned uLength = (uLengthA > uLengthB ? uLengthA : uLengthB) + 1;\r
37         if (uLength < DPM.uLength)\r
38                 return;\r
39 \r
40 // Add 256 to allow for future expansion and\r
41 // round up to next multiple of 32.\r
42         uLength += 256;\r
43         uLength += 32 - uLength%32;\r
44 \r
45         const unsigned uOldLength = DPM.uLength;\r
46         if (uOldLength > 0)\r
47                 {\r
48                 for (unsigned i = 0; i < uOldLength; ++i)\r
49                         {\r
50                         delete[] DPM.TraceBack[i];\r
51                         delete[] DPM.FreqsA[i];\r
52                         delete[] DPM.SortOrderA[i];\r
53                         }\r
54                 for (unsigned n = 0; n < 20; ++n)\r
55                         delete[] DPM.ScoreMxB[n];\r
56 \r
57                 delete[] DPM.MPrev;\r
58                 delete[] DPM.MCurr;\r
59                 delete[] DPM.MWork;\r
60                 delete[] DPM.DPrev;\r
61                 delete[] DPM.DCurr;\r
62                 delete[] DPM.DWork;\r
63                 delete[] DPM.uDeletePos;\r
64                 delete[] DPM.GapOpenA;\r
65                 delete[] DPM.GapOpenB;\r
66                 delete[] DPM.GapCloseA;\r
67                 delete[] DPM.GapCloseB;\r
68                 delete[] DPM.SortOrderA;\r
69                 delete[] DPM.FreqsA;\r
70                 delete[] DPM.ScoreMxB;\r
71                 delete[] DPM.TraceBack;\r
72 #if     OCC\r
73                 delete[] DPM.OccA;\r
74                 delete[] DPM.OccB;\r
75 #endif\r
76                 }\r
77 \r
78         DPM.uLength = uLength;\r
79 \r
80         DPM.GapOpenA = new SCORE[uLength];\r
81         DPM.GapOpenB = new SCORE[uLength];\r
82         DPM.GapCloseA = new SCORE[uLength];\r
83         DPM.GapCloseB = new SCORE[uLength];\r
84 #if     OCC\r
85         DPM.OccA = new FCOUNT[uLength];\r
86         DPM.OccB = new FCOUNT[uLength];\r
87 #endif\r
88 \r
89         DPM.SortOrderA = new unsigned*[uLength];\r
90         DPM.FreqsA = new FCOUNT*[uLength];\r
91         DPM.ScoreMxB = new SCORE*[20];\r
92         DPM.MPrev = new SCORE[uLength];\r
93         DPM.MCurr = new SCORE[uLength];\r
94         DPM.MWork = new SCORE[uLength];\r
95 \r
96         DPM.DPrev = new SCORE[uLength];\r
97         DPM.DCurr = new SCORE[uLength];\r
98         DPM.DWork = new SCORE[uLength];\r
99         DPM.uDeletePos = new unsigned[uLength];\r
100 \r
101         DPM.TraceBack = new int*[uLength];\r
102 \r
103         for (unsigned uLetter = 0; uLetter < 20; ++uLetter)\r
104                 DPM.ScoreMxB[uLetter] = new SCORE[uLength];\r
105 \r
106         for (unsigned i = 0; i < uLength; ++i)\r
107                 {\r
108                 DPM.SortOrderA[i] = new unsigned[20];\r
109                 DPM.FreqsA[i] = new FCOUNT[20];\r
110                 DPM.TraceBack[i] = new int[uLength];\r
111                 }\r
112         }\r
113 \r
114 SCORE GlobalAlignLA(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,\r
115   unsigned uLengthB, PWPath &Path)\r
116         {\r
117         const unsigned uPrefixCountA = uLengthA + 1;\r
118         const unsigned uPrefixCountB = uLengthB + 1;\r
119 \r
120         AllocDPMem(uLengthA, uLengthB);\r
121 \r
122         SCORE *GapOpenA = DPM.GapOpenA;\r
123         SCORE *GapOpenB = DPM.GapOpenB;\r
124         SCORE *GapCloseA = DPM.GapCloseA;\r
125         SCORE *GapCloseB = DPM.GapCloseB;\r
126 \r
127         unsigned **SortOrderA = DPM.SortOrderA;\r
128         FCOUNT **FreqsA = DPM.FreqsA;\r
129         SCORE **ScoreMxB = DPM.ScoreMxB;\r
130         SCORE *MPrev = DPM.MPrev;\r
131         SCORE *MCurr = DPM.MCurr;\r
132         SCORE *MWork = DPM.MWork;\r
133 \r
134         SCORE *DPrev = DPM.DPrev;\r
135         SCORE *DCurr = DPM.DCurr;\r
136         SCORE *DWork = DPM.DWork;\r
137 \r
138 #if     OCC\r
139         FCOUNT *OccA = DPM.OccA;\r
140         FCOUNT *OccB = DPM.OccB;\r
141 #endif\r
142 \r
143         unsigned *uDeletePos = DPM.uDeletePos;\r
144 \r
145         int **TraceBack = DPM.TraceBack;\r
146 \r
147         for (unsigned i = 0; i < uLengthA; ++i)\r
148                 {\r
149                 GapOpenA[i] = PA[i].m_scoreGapOpen;\r
150                 GapCloseA[i] = PA[i].m_scoreGapClose;\r
151 #if     OCC\r
152                 OccA[i] = PA[i].m_fOcc;\r
153 #endif\r
154 \r
155                 for (unsigned uLetter = 0; uLetter < 20; ++uLetter)\r
156                         {\r
157                         SortOrderA[i][uLetter] = PA[i].m_uSortOrder[uLetter];\r
158                         FreqsA[i][uLetter] = PA[i].m_fcCounts[uLetter];\r
159                         }\r
160                 }\r
161 \r
162         for (unsigned j = 0; j < uLengthB; ++j)\r
163                 {\r
164                 GapOpenB[j] = PB[j].m_scoreGapOpen;\r
165                 GapCloseB[j] = PB[j].m_scoreGapClose;\r
166 #if     OCC\r
167                 OccB[j] = PB[j].m_fOcc;\r
168 #endif\r
169                 }\r
170 \r
171         for (unsigned uLetter = 0; uLetter < 20; ++uLetter)\r
172                 {\r
173                 for (unsigned j = 0; j < uLengthB; ++j)\r
174                         ScoreMxB[uLetter][j] = PB[j].m_AAScores[uLetter];\r
175                 }\r
176 \r
177         for (unsigned i = 0; i < uPrefixCountA; ++i)\r
178                 memset(TraceBack[i], 0, uPrefixCountB*sizeof(int));\r
179 \r
180 // Special case for i=0\r
181         unsigned **ptrSortOrderA = SortOrderA;\r
182         FCOUNT **ptrFreqsA = FreqsA;\r
183         assert(ptrSortOrderA == &(SortOrderA[0]));\r
184         assert(ptrFreqsA == &(FreqsA[0]));\r
185         TraceBack[0][0] = 0;\r
186 \r
187         SCORE scoreSum = 0;\r
188         unsigned *ptrSortOrderAi = SortOrderA[0];\r
189         const unsigned *ptrSortOrderAEnd = ptrSortOrderAi + 20;\r
190         FCOUNT *ptrFreqsAi = FreqsA[0];\r
191         for (; ptrSortOrderAi != ptrSortOrderAEnd; ++ptrSortOrderAi)\r
192                 {\r
193                 const unsigned uLetter = *ptrSortOrderAi;\r
194                 const FCOUNT fcLetter = ptrFreqsAi[uLetter];\r
195                 if (0 == fcLetter)\r
196                         break;\r
197                 scoreSum += fcLetter*ScoreMxB[uLetter][0];\r
198                 }\r
199         if (0 == scoreSum)\r
200                 MPrev[0] = -2.5;\r
201         else\r
202                 {\r
203 #if     OCC\r
204                 MPrev[0] = (logf(scoreSum) - g_scoreCenter)*OccA[0]*OccB[0];\r
205 #else\r
206                 MPrev[0] = (logf(scoreSum) - g_scoreCenter);\r
207 #endif\r
208                 }\r
209 \r
210 // D(0,0) is -infinity (requires I->D).\r
211         DPrev[0] = MINUS_INFINITY;\r
212 \r
213         for (unsigned j = 1; j < uLengthB; ++j)\r
214                 {\r
215         // Only way to get M(0, j) looks like this:\r
216         //              A       ----X\r
217         //              B       XXXXX\r
218         //                      0   j\r
219         // So gap-open at j=0, gap-close at j-1.\r
220                 SCORE scoreSum = 0;\r
221                 unsigned *ptrSortOrderAi = SortOrderA[0];\r
222                 const unsigned *ptrSortOrderAEnd = ptrSortOrderAi + 20;\r
223                 FCOUNT *ptrFreqsAi = FreqsA[0];\r
224                 for (; ptrSortOrderAi != ptrSortOrderAEnd; ++ptrSortOrderAi)\r
225                         {\r
226                         const unsigned uLetter = *ptrSortOrderAi;\r
227                         const FCOUNT fcLetter = ptrFreqsAi[uLetter];\r
228                         if (0 == fcLetter)\r
229                                 break;\r
230                         scoreSum += fcLetter*ScoreMxB[uLetter][j];\r
231                         }\r
232                 if (0 == scoreSum)\r
233                         MPrev[j] = -2.5;\r
234                 else\r
235                         {\r
236 #if     OCC\r
237                         MPrev[j] = (logf(scoreSum) - g_scoreCenter)*OccA[0]*OccB[j] +\r
238                           GapOpenB[0] + GapCloseB[j-1];\r
239 #else\r
240                         MPrev[j] = (logf(scoreSum) - g_scoreCenter) +\r
241                           GapOpenB[0] + GapCloseB[j-1];\r
242 #endif\r
243                         }\r
244                 TraceBack[0][j] = -(int) j;\r
245 \r
246         // Assume no D->I transitions, then can't be a delete if only\r
247         // one letter from A.\r
248                 DPrev[j] = MINUS_INFINITY;\r
249                 }\r
250 \r
251         SCORE IPrev_j_1;\r
252         for (unsigned i = 1; i < uLengthA; ++i)\r
253                 {\r
254                 ++ptrSortOrderA;\r
255                 ++ptrFreqsA;\r
256                 assert(ptrSortOrderA == &(SortOrderA[i]));\r
257                 assert(ptrFreqsA == &(FreqsA[i]));\r
258 \r
259                 SCORE *ptrMCurr_j = MCurr;\r
260                 memset(ptrMCurr_j, 0, uLengthB*sizeof(SCORE));\r
261                 const FCOUNT *FreqsAi = *ptrFreqsA;\r
262 \r
263                 const unsigned *SortOrderAi = *ptrSortOrderA;\r
264                 const unsigned *ptrSortOrderAiEnd = SortOrderAi + 20;\r
265                 const SCORE *ptrMCurrMax = MCurr + uLengthB;\r
266                 for (const unsigned *ptrSortOrderAi = SortOrderAi;\r
267                   ptrSortOrderAi != ptrSortOrderAiEnd;\r
268                   ++ptrSortOrderAi)\r
269                         {\r
270                         const unsigned uLetter = *ptrSortOrderAi;\r
271                         SCORE *NSBR_Letter = ScoreMxB[uLetter];\r
272                         const FCOUNT fcLetter = FreqsAi[uLetter];\r
273                         if (0 == fcLetter)\r
274                                 break;\r
275                         SCORE *ptrNSBR = NSBR_Letter;\r
276                         for (SCORE *ptrMCurr = MCurr; ptrMCurr != ptrMCurrMax; ++ptrMCurr)\r
277                                 *ptrMCurr += fcLetter*(*ptrNSBR++);\r
278                         }\r
279 \r
280 #if     OCC\r
281                 const FCOUNT OccAi = OccA[i];\r
282 #endif\r
283                 for (unsigned j = 0; j < uLengthB; ++j)\r
284                         {\r
285                         if (MCurr[j] == 0)\r
286                                 MCurr[j] = -2.5;\r
287                         else\r
288 #if     OCC\r
289                                 MCurr[j] = (logf(MCurr[j]) - g_scoreCenter)*OccAi*OccB[j];\r
290 #else\r
291                                 MCurr[j] = (logf(MCurr[j]) - g_scoreCenter);\r
292 #endif\r
293                         }\r
294 \r
295                 ptrMCurr_j = MCurr;\r
296                 unsigned *ptrDeletePos = uDeletePos;\r
297 \r
298         // Special case for j=0\r
299         // Only way to get M(i, 0) looks like this:\r
300         //                      0   i\r
301         //              A       XXXXX\r
302         //              B       ----X\r
303         // So gap-open at i=0, gap-close at i-1.\r
304                 assert(ptrMCurr_j == &(MCurr[0]));\r
305                 *ptrMCurr_j += GapOpenA[0] + GapCloseA[i-1];\r
306 \r
307                 ++ptrMCurr_j;\r
308 \r
309                 int *ptrTraceBack_ij = TraceBack[i];\r
310                 *ptrTraceBack_ij++ = (int) i;\r
311 \r
312                 SCORE *ptrMPrev_j = MPrev;\r
313                 SCORE *ptrDPrev = DPrev;\r
314                 SCORE d = *ptrDPrev;\r
315                 SCORE DNew = *ptrMPrev_j + GapOpenA[i];\r
316                 if (DNew > d)\r
317                         {\r
318                         d = DNew;\r
319                         *ptrDeletePos = i;\r
320                         }\r
321 \r
322                 SCORE *ptrDCurr = DCurr;\r
323 \r
324                 assert(ptrDCurr == &(DCurr[0]));\r
325                 *ptrDCurr = d;\r
326 \r
327         // Can't have an insert if no letters from B\r
328                 IPrev_j_1 = MINUS_INFINITY;\r
329 \r
330                 unsigned uInsertPos;\r
331                 const SCORE scoreGapOpenAi = GapOpenA[i];\r
332                 const SCORE scoreGapCloseAi_1 = GapCloseA[i-1];\r
333 \r
334                 for (unsigned j = 1; j < uLengthB; ++j)\r
335                         {\r
336                 // Here, MPrev_j is preserved from previous\r
337                 // iteration so with current i,j is M[i-1][j-1]\r
338                         SCORE MPrev_j = *ptrMPrev_j;\r
339                         SCORE INew = MPrev_j + GapOpenB[j];\r
340                         if (INew > IPrev_j_1)\r
341                                 {\r
342                                 IPrev_j_1 = INew;\r
343                                 uInsertPos = j;\r
344                                 }\r
345 \r
346                         SCORE scoreMax = MPrev_j;\r
347 \r
348                         assert(ptrDPrev == &(DPrev[j-1]));\r
349                         SCORE scoreD = *ptrDPrev++ + scoreGapCloseAi_1;\r
350                         if (scoreD > scoreMax)\r
351                                 {\r
352                                 scoreMax = scoreD;\r
353                                 assert(ptrDeletePos == &(uDeletePos[j-1]));\r
354                                 *ptrTraceBack_ij = (int) i - (int) *ptrDeletePos;\r
355                                 assert(*ptrTraceBack_ij > 0);\r
356                                 }\r
357                         ++ptrDeletePos;\r
358 \r
359                         SCORE scoreI = IPrev_j_1 + GapCloseB[j-1];\r
360                         if (scoreI > scoreMax)\r
361                                 {\r
362                                 scoreMax = scoreI;\r
363                                 *ptrTraceBack_ij = (int) uInsertPos - (int) j;\r
364                                 assert(*ptrTraceBack_ij < 0);\r
365                                 }\r
366 \r
367                         assert(ptrSortOrderA == &(SortOrderA[i]));\r
368                         assert(ptrFreqsA == &(FreqsA[i]));\r
369 \r
370                         *ptrMCurr_j += scoreMax;\r
371                         assert(ptrMCurr_j == &(MCurr[j]));\r
372                         ++ptrMCurr_j;\r
373 \r
374                         MPrev_j = *(++ptrMPrev_j);\r
375                         assert(ptrDPrev == &(DPrev[j]));\r
376                         SCORE d = *ptrDPrev;\r
377                         SCORE DNew = MPrev_j + scoreGapOpenAi;\r
378                         if (DNew > d)\r
379                                 {\r
380                                 d = DNew;\r
381                                 assert(ptrDeletePos == &uDeletePos[j]);\r
382                                 *ptrDeletePos = i;\r
383                                 }\r
384                         assert(ptrDCurr + 1 == &(DCurr[j]));\r
385                         *(++ptrDCurr) = d;\r
386 \r
387                         ++ptrTraceBack_ij;\r
388                         }\r
389 \r
390                 Rotate(MPrev, MCurr, MWork);\r
391                 Rotate(DPrev, DCurr, DWork);\r
392                 }\r
393 \r
394 // Special case for i=uLengthA\r
395         SCORE IPrev = MINUS_INFINITY;\r
396 \r
397         unsigned uInsertPos;\r
398 \r
399         for (unsigned j = 1; j < uLengthB; ++j)\r
400                 {\r
401                 SCORE INew = MPrev[j-1] + GapOpenB[j];\r
402                 if (INew > IPrev)\r
403                         {\r
404                         uInsertPos = j;\r
405                         IPrev = INew;\r
406                         }\r
407                 }\r
408 \r
409 // Special case for i=uLengthA, j=uLengthB\r
410         SCORE scoreMax = MPrev[uLengthB-1];\r
411         int iTraceBack = 0;\r
412 \r
413         SCORE scoreD = DPrev[uLengthB-1] + GapCloseA[uLengthA-1];\r
414         if (scoreD > scoreMax)\r
415                 {\r
416                 scoreMax = scoreD;\r
417                 iTraceBack = (int) uLengthA - (int) uDeletePos[uLengthB-1];\r
418                 }\r
419 \r
420         SCORE scoreI = IPrev + GapCloseB[uLengthB-1];\r
421         if (scoreI > scoreMax)\r
422                 {\r
423                 scoreMax = scoreI;\r
424                 iTraceBack = (int) uInsertPos - (int) uLengthB;\r
425                 }\r
426 \r
427         TraceBack[uLengthA][uLengthB] = iTraceBack;\r
428 \r
429         TraceBackToPath(TraceBack, uLengthA, uLengthB, Path);\r
430 \r
431         return scoreMax;\r
432         }\r