Next version of JABA
[jabaws.git] / binaries / src / muscle / glbalignspn.cpp
1 #include "muscle.h"\r
2 #include "profile.h"\r
3 #include "pwpath.h"\r
4 \r
5 struct DP_MEMORY\r
6         {\r
7         unsigned uLength;\r
8         SCORE *GapOpenA;\r
9         SCORE *GapOpenB;\r
10         SCORE *GapCloseA;\r
11         SCORE *GapCloseB;\r
12         SCORE *MPrev;\r
13         SCORE *MCurr;\r
14         SCORE *MWork;\r
15         SCORE *DPrev;\r
16         SCORE *DCurr;\r
17         SCORE *DWork;\r
18         SCORE **ScoreMxB;\r
19         unsigned **SortOrderA;\r
20         unsigned *uDeletePos;\r
21         FCOUNT **FreqsA;\r
22         int **TraceBack;\r
23         };\r
24 \r
25 static struct DP_MEMORY DPM;\r
26 \r
27 void FreeDPMemSPN()\r
28         {\r
29         const unsigned uOldLength = DPM.uLength;\r
30         if (0 == uOldLength)\r
31                 return;\r
32 \r
33         for (unsigned i = 0; i < uOldLength; ++i)\r
34                 {\r
35                 delete[] DPM.TraceBack[i];\r
36                 delete[] DPM.FreqsA[i];\r
37                 delete[] DPM.SortOrderA[i];\r
38                 }\r
39         for (unsigned n = 0; n < 4; ++n)\r
40                 delete[] DPM.ScoreMxB[n];\r
41 \r
42         delete[] DPM.MPrev;\r
43         delete[] DPM.MCurr;\r
44         delete[] DPM.MWork;\r
45         delete[] DPM.DPrev;\r
46         delete[] DPM.DCurr;\r
47         delete[] DPM.DWork;\r
48         delete[] DPM.uDeletePos;\r
49         delete[] DPM.GapOpenA;\r
50         delete[] DPM.GapOpenB;\r
51         delete[] DPM.GapCloseA;\r
52         delete[] DPM.GapCloseB;\r
53         delete[] DPM.SortOrderA;\r
54         delete[] DPM.FreqsA;\r
55         delete[] DPM.ScoreMxB;\r
56         delete[] DPM.TraceBack;\r
57         }\r
58 \r
59 static void AllocDPMem(unsigned uLengthA, unsigned uLengthB)\r
60         {\r
61 // Max prefix length\r
62         unsigned uLength = (uLengthA > uLengthB ? uLengthA : uLengthB) + 1;\r
63         if (uLength < DPM.uLength)\r
64                 return;\r
65 \r
66 // Add 256 to allow for future expansion and\r
67 // round up to next multiple of 32.\r
68         uLength += 256;\r
69         uLength += 32 - uLength%32;\r
70 \r
71         const unsigned uOldLength = DPM.uLength;\r
72         if (uOldLength > 0)\r
73                 {\r
74                 for (unsigned i = 0; i < uOldLength; ++i)\r
75                         {\r
76                         delete[] DPM.TraceBack[i];\r
77                         delete[] DPM.FreqsA[i];\r
78                         delete[] DPM.SortOrderA[i];\r
79                         }\r
80                 for (unsigned n = 0; n < 4; ++n)\r
81                         delete[] DPM.ScoreMxB[n];\r
82 \r
83                 delete[] DPM.MPrev;\r
84                 delete[] DPM.MCurr;\r
85                 delete[] DPM.MWork;\r
86                 delete[] DPM.DPrev;\r
87                 delete[] DPM.DCurr;\r
88                 delete[] DPM.DWork;\r
89                 delete[] DPM.uDeletePos;\r
90                 delete[] DPM.GapOpenA;\r
91                 delete[] DPM.GapOpenB;\r
92                 delete[] DPM.GapCloseA;\r
93                 delete[] DPM.GapCloseB;\r
94                 delete[] DPM.SortOrderA;\r
95                 delete[] DPM.FreqsA;\r
96                 delete[] DPM.ScoreMxB;\r
97                 delete[] DPM.TraceBack;\r
98                 }\r
99 \r
100         DPM.uLength = uLength;\r
101 \r
102         DPM.GapOpenA = new SCORE[uLength];\r
103         DPM.GapOpenB = new SCORE[uLength];\r
104         DPM.GapCloseA = new SCORE[uLength];\r
105         DPM.GapCloseB = new SCORE[uLength];\r
106 \r
107         DPM.SortOrderA = new unsigned*[uLength];\r
108         DPM.FreqsA = new FCOUNT*[uLength];\r
109         DPM.ScoreMxB = new SCORE*[4];\r
110         DPM.MPrev = new SCORE[uLength];\r
111         DPM.MCurr = new SCORE[uLength];\r
112         DPM.MWork = new SCORE[uLength];\r
113 \r
114         DPM.DPrev = new SCORE[uLength];\r
115         DPM.DCurr = new SCORE[uLength];\r
116         DPM.DWork = new SCORE[uLength];\r
117         DPM.uDeletePos = new unsigned[uLength];\r
118 \r
119         DPM.TraceBack = new int*[uLength];\r
120 \r
121         for (unsigned uLetter = 0; uLetter < 4; ++uLetter)\r
122                 DPM.ScoreMxB[uLetter] = new SCORE[uLength];\r
123 \r
124         for (unsigned i = 0; i < uLength; ++i)\r
125                 {\r
126                 DPM.SortOrderA[i] = new unsigned[4];\r
127                 DPM.FreqsA[i] = new FCOUNT[4];\r
128                 DPM.TraceBack[i] = new int[uLength];\r
129                 }\r
130         }\r
131 \r
132 SCORE GlobalAlignSPN(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,\r
133   unsigned uLengthB, PWPath &Path)\r
134         {\r
135         if (ALPHA_DNA != g_Alpha || ALPHA_RNA == g_Alpha)\r
136                 Quit("GlobalAlignSPN: must be nucleo");\r
137 \r
138         const unsigned uPrefixCountA = uLengthA + 1;\r
139         const unsigned uPrefixCountB = uLengthB + 1;\r
140 \r
141         AllocDPMem(uLengthA, uLengthB);\r
142 \r
143         SCORE *GapOpenA = DPM.GapOpenA;\r
144         SCORE *GapOpenB = DPM.GapOpenB;\r
145         SCORE *GapCloseA = DPM.GapCloseA;\r
146         SCORE *GapCloseB = DPM.GapCloseB;\r
147 \r
148         unsigned **SortOrderA = DPM.SortOrderA;\r
149         FCOUNT **FreqsA = DPM.FreqsA;\r
150         SCORE **ScoreMxB = DPM.ScoreMxB;\r
151         SCORE *MPrev = DPM.MPrev;\r
152         SCORE *MCurr = DPM.MCurr;\r
153         SCORE *MWork = DPM.MWork;\r
154 \r
155         SCORE *DPrev = DPM.DPrev;\r
156         SCORE *DCurr = DPM.DCurr;\r
157         SCORE *DWork = DPM.DWork;\r
158         unsigned *uDeletePos = DPM.uDeletePos;\r
159 \r
160         int **TraceBack = DPM.TraceBack;\r
161 \r
162         for (unsigned i = 0; i < uLengthA; ++i)\r
163                 {\r
164                 GapOpenA[i] = PA[i].m_scoreGapOpen;\r
165                 GapCloseA[i] = PA[i].m_scoreGapClose;\r
166 \r
167                 for (unsigned uLetter = 0; uLetter < 4; ++uLetter)\r
168                         {\r
169                         SortOrderA[i][uLetter] = PA[i].m_uSortOrder[uLetter];\r
170                         FreqsA[i][uLetter] = PA[i].m_fcCounts[uLetter];\r
171                         }\r
172                 }\r
173 \r
174         for (unsigned j = 0; j < uLengthB; ++j)\r
175                 {\r
176                 GapOpenB[j] = PB[j].m_scoreGapOpen;\r
177                 GapCloseB[j] = PB[j].m_scoreGapClose;\r
178                 }\r
179 \r
180         for (unsigned uLetter = 0; uLetter < 4; ++uLetter)\r
181                 {\r
182                 for (unsigned j = 0; j < uLengthB; ++j)\r
183                         ScoreMxB[uLetter][j] = PB[j].m_AAScores[uLetter];\r
184                 }\r
185 \r
186         for (unsigned i = 0; i < uPrefixCountA; ++i)\r
187                 memset(TraceBack[i], 0, uPrefixCountB*sizeof(int));\r
188 \r
189 // Special case for i=0\r
190         unsigned **ptrSortOrderA = SortOrderA;\r
191         FCOUNT **ptrFreqsA = FreqsA;\r
192         assert(ptrSortOrderA == &(SortOrderA[0]));\r
193         assert(ptrFreqsA == &(FreqsA[0]));\r
194         TraceBack[0][0] = 0;\r
195 \r
196         SCORE scoreSum = 0;\r
197         unsigned *ptrSortOrderAi = SortOrderA[0];\r
198         const unsigned *ptrSortOrderAEnd = ptrSortOrderAi + 4;\r
199         FCOUNT *ptrFreqsAi = FreqsA[0];\r
200         for (; ptrSortOrderAi != ptrSortOrderAEnd; ++ptrSortOrderAi)\r
201                 {\r
202                 const unsigned uLetter = *ptrSortOrderAi;\r
203                 const FCOUNT fcLetter = ptrFreqsAi[uLetter];\r
204                 if (0 == fcLetter)\r
205                         break;\r
206                 scoreSum += fcLetter*ScoreMxB[uLetter][0];\r
207                 }\r
208         MPrev[0] = scoreSum - g_scoreCenter;\r
209 \r
210 // D(0,0) is -infinity (requires I->D).\r
211         DPrev[0] = MINUS_INFINITY;\r
212 \r
213         for (unsigned j = 1; j < uLengthB; ++j)\r
214                 {\r
215         // Only way to get M(0, j) looks like this:\r
216         //              A       ----X\r
217         //              B       XXXXX\r
218         //                      0   j\r
219         // So gap-open at j=0, gap-close at j-1.\r
220                 SCORE scoreSum = 0;\r
221                 unsigned *ptrSortOrderAi = SortOrderA[0];\r
222                 const unsigned *ptrSortOrderAEnd = ptrSortOrderAi + 4;\r
223                 FCOUNT *ptrFreqsAi = FreqsA[0];\r
224                 for (; ptrSortOrderAi != ptrSortOrderAEnd; ++ptrSortOrderAi)\r
225                         {\r
226                         const unsigned uLetter = *ptrSortOrderAi;\r
227                         const FCOUNT fcLetter = ptrFreqsAi[uLetter];\r
228                         if (0 == fcLetter)\r
229                                 break;\r
230                         scoreSum += fcLetter*ScoreMxB[uLetter][j];\r
231                         }\r
232                 MPrev[j] = scoreSum - g_scoreCenter + GapOpenB[0] + GapCloseB[j-1];\r
233                 TraceBack[0][j] = -(int) j;\r
234 \r
235         // Assume no D->I transitions, then can't be a delete if only\r
236         // one letter from A.\r
237                 DPrev[j] = MINUS_INFINITY;\r
238                 }\r
239 \r
240         SCORE IPrev_j_1;\r
241         for (unsigned i = 1; i < uLengthA; ++i)\r
242                 {\r
243                 ++ptrSortOrderA;\r
244                 ++ptrFreqsA;\r
245                 assert(ptrSortOrderA == &(SortOrderA[i]));\r
246                 assert(ptrFreqsA == &(FreqsA[i]));\r
247 \r
248                 SCORE *ptrMCurr_j = MCurr;\r
249                 memset(ptrMCurr_j, 0, uLengthB*sizeof(SCORE));\r
250                 const FCOUNT *FreqsAi = *ptrFreqsA;\r
251 \r
252                 const unsigned *SortOrderAi = *ptrSortOrderA;\r
253                 const unsigned *ptrSortOrderAiEnd = SortOrderAi + 4;\r
254                 const SCORE *ptrMCurrMax = MCurr + uLengthB;\r
255                 for (const unsigned *ptrSortOrderAi = SortOrderAi;\r
256                   ptrSortOrderAi != ptrSortOrderAiEnd;\r
257                   ++ptrSortOrderAi)\r
258                         {\r
259                         const unsigned uLetter = *ptrSortOrderAi;\r
260                         SCORE *NSBR_Letter = ScoreMxB[uLetter];\r
261                         const FCOUNT fcLetter = FreqsAi[uLetter];\r
262                         if (0 == fcLetter)\r
263                                 break;\r
264                         SCORE *ptrNSBR = NSBR_Letter;\r
265                         for (SCORE *ptrMCurr = MCurr; ptrMCurr != ptrMCurrMax; ++ptrMCurr)\r
266                                 *ptrMCurr += fcLetter*(*ptrNSBR++);\r
267                         }\r
268 \r
269                 for (unsigned j = 0; j < uLengthB; ++j)\r
270                         MCurr[j] -= g_scoreCenter;\r
271 \r
272                 ptrMCurr_j = MCurr;\r
273                 unsigned *ptrDeletePos = uDeletePos;\r
274 \r
275         // Special case for j=0\r
276         // Only way to get M(i, 0) looks like this:\r
277         //                      0   i\r
278         //              A       XXXXX\r
279         //              B       ----X\r
280         // So gap-open at i=0, gap-close at i-1.\r
281                 assert(ptrMCurr_j == &(MCurr[0]));\r
282                 *ptrMCurr_j += GapOpenA[0] + GapCloseA[i-1];\r
283 \r
284                 ++ptrMCurr_j;\r
285 \r
286                 int *ptrTraceBack_ij = TraceBack[i];\r
287                 *ptrTraceBack_ij++ = (int) i;\r
288 \r
289                 SCORE *ptrMPrev_j = MPrev;\r
290                 SCORE *ptrDPrev = DPrev;\r
291                 SCORE d = *ptrDPrev;\r
292                 SCORE DNew = *ptrMPrev_j + GapOpenA[i];\r
293                 if (DNew > d)\r
294                         {\r
295                         d = DNew;\r
296                         *ptrDeletePos = i;\r
297                         }\r
298 \r
299                 SCORE *ptrDCurr = DCurr;\r
300 \r
301                 assert(ptrDCurr == &(DCurr[0]));\r
302                 *ptrDCurr = d;\r
303 \r
304         // Can't have an insert if no letters from B\r
305                 IPrev_j_1 = MINUS_INFINITY;\r
306 \r
307                 unsigned uInsertPos;\r
308                 const SCORE scoreGapOpenAi = GapOpenA[i];\r
309                 const SCORE scoreGapCloseAi_1 = GapCloseA[i-1];\r
310 \r
311                 for (unsigned j = 1; j < uLengthB; ++j)\r
312                         {\r
313                 // Here, MPrev_j is preserved from previous\r
314                 // iteration so with current i,j is M[i-1][j-1]\r
315                         SCORE MPrev_j = *ptrMPrev_j;\r
316                         SCORE INew = MPrev_j + GapOpenB[j];\r
317                         if (INew > IPrev_j_1)\r
318                                 {\r
319                                 IPrev_j_1 = INew;\r
320                                 uInsertPos = j;\r
321                                 }\r
322 \r
323                         SCORE scoreMax = MPrev_j;\r
324 \r
325                         assert(ptrDPrev == &(DPrev[j-1]));\r
326                         SCORE scoreD = *ptrDPrev++ + scoreGapCloseAi_1;\r
327                         if (scoreD > scoreMax)\r
328                                 {\r
329                                 scoreMax = scoreD;\r
330                                 assert(ptrDeletePos == &(uDeletePos[j-1]));\r
331                                 *ptrTraceBack_ij = (int) i - (int) *ptrDeletePos;\r
332                                 assert(*ptrTraceBack_ij > 0);\r
333                                 }\r
334                         ++ptrDeletePos;\r
335 \r
336                         SCORE scoreI = IPrev_j_1 + GapCloseB[j-1];\r
337                         if (scoreI > scoreMax)\r
338                                 {\r
339                                 scoreMax = scoreI;\r
340                                 *ptrTraceBack_ij = (int) uInsertPos - (int) j;\r
341                                 assert(*ptrTraceBack_ij < 0);\r
342                                 }\r
343 \r
344                         assert(ptrSortOrderA == &(SortOrderA[i]));\r
345                         assert(ptrFreqsA == &(FreqsA[i]));\r
346 \r
347                         *ptrMCurr_j += scoreMax;\r
348                         assert(ptrMCurr_j == &(MCurr[j]));\r
349                         ++ptrMCurr_j;\r
350 \r
351                         MPrev_j = *(++ptrMPrev_j);\r
352                         assert(ptrDPrev == &(DPrev[j]));\r
353                         SCORE d = *ptrDPrev;\r
354                         SCORE DNew = MPrev_j + scoreGapOpenAi;\r
355                         if (DNew > d)\r
356                                 {\r
357                                 d = DNew;\r
358                                 assert(ptrDeletePos == &uDeletePos[j]);\r
359                                 *ptrDeletePos = i;\r
360                                 }\r
361                         assert(ptrDCurr + 1 == &(DCurr[j]));\r
362                         *(++ptrDCurr) = d;\r
363 \r
364                         ++ptrTraceBack_ij;\r
365                         }\r
366 \r
367                 Rotate(MPrev, MCurr, MWork);\r
368                 Rotate(DPrev, DCurr, DWork);\r
369                 }\r
370 \r
371 // Special case for i=uLengthA\r
372         SCORE IPrev = MINUS_INFINITY;\r
373 \r
374         unsigned uInsertPos;\r
375 \r
376         for (unsigned j = 1; j < uLengthB; ++j)\r
377                 {\r
378                 SCORE INew = MPrev[j-1] + GapOpenB[j];\r
379                 if (INew > IPrev)\r
380                         {\r
381                         uInsertPos = j;\r
382                         IPrev = INew;\r
383                         }\r
384                 }\r
385 \r
386 // Special case for i=uLengthA, j=uLengthB\r
387         SCORE scoreMax = MPrev[uLengthB-1];\r
388         int iTraceBack = 0;\r
389 \r
390         SCORE scoreD = DPrev[uLengthB-1] + GapCloseA[uLengthA-1];\r
391         if (scoreD > scoreMax)\r
392                 {\r
393                 scoreMax = scoreD;\r
394                 iTraceBack = (int) uLengthA - (int) uDeletePos[uLengthB-1];\r
395                 }\r
396 \r
397         SCORE scoreI = IPrev + GapCloseB[uLengthB-1];\r
398         if (scoreI > scoreMax)\r
399                 {\r
400                 scoreMax = scoreI;\r
401                 iTraceBack = (int) uInsertPos - (int) uLengthB;\r
402                 }\r
403 \r
404         TraceBack[uLengthA][uLengthB] = iTraceBack;\r
405 \r
406         TraceBackToPath(TraceBack, uLengthA, uLengthB, Path);\r
407 \r
408         return scoreMax;\r
409         }\r