Next version of JABA
[jabaws.git] / binaries / src / muscle / glbalignsp.cpp
1 #include "muscle.h"\r
2 #include "profile.h"\r
3 #include "pwpath.h"\r
4 \r
5 struct DP_MEMORY\r
6         {\r
7         unsigned uLength;\r
8         SCORE *GapOpenA;\r
9         SCORE *GapOpenB;\r
10         SCORE *GapCloseA;\r
11         SCORE *GapCloseB;\r
12         SCORE *MPrev;\r
13         SCORE *MCurr;\r
14         SCORE *MWork;\r
15         SCORE *DPrev;\r
16         SCORE *DCurr;\r
17         SCORE *DWork;\r
18         SCORE **ScoreMxB;\r
19         unsigned **SortOrderA;\r
20         unsigned *uDeletePos;\r
21         FCOUNT **FreqsA;\r
22         int **TraceBack;\r
23         };\r
24 \r
25 static struct DP_MEMORY DPM;\r
26 \r
27 static void AllocDPMem(unsigned uLengthA, unsigned uLengthB)\r
28         {\r
29 // Max prefix length\r
30         unsigned uLength = (uLengthA > uLengthB ? uLengthA : uLengthB) + 1;\r
31         if (uLength < DPM.uLength)\r
32                 return;\r
33 \r
34 // Add 256 to allow for future expansion and\r
35 // round up to next multiple of 32.\r
36         uLength += 256;\r
37         uLength += 32 - uLength%32;\r
38 \r
39         const unsigned uOldLength = DPM.uLength;\r
40         if (uOldLength > 0)\r
41                 {\r
42                 for (unsigned i = 0; i < uOldLength; ++i)\r
43                         {\r
44                         delete[] DPM.TraceBack[i];\r
45                         delete[] DPM.FreqsA[i];\r
46                         delete[] DPM.SortOrderA[i];\r
47                         }\r
48                 for (unsigned n = 0; n < 20; ++n)\r
49                         delete[] DPM.ScoreMxB[n];\r
50 \r
51                 delete[] DPM.MPrev;\r
52                 delete[] DPM.MCurr;\r
53                 delete[] DPM.MWork;\r
54                 delete[] DPM.DPrev;\r
55                 delete[] DPM.DCurr;\r
56                 delete[] DPM.DWork;\r
57                 delete[] DPM.uDeletePos;\r
58                 delete[] DPM.GapOpenA;\r
59                 delete[] DPM.GapOpenB;\r
60                 delete[] DPM.GapCloseA;\r
61                 delete[] DPM.GapCloseB;\r
62                 delete[] DPM.SortOrderA;\r
63                 delete[] DPM.FreqsA;\r
64                 delete[] DPM.ScoreMxB;\r
65                 delete[] DPM.TraceBack;\r
66                 }\r
67 \r
68         DPM.uLength = uLength;\r
69 \r
70         DPM.GapOpenA = new SCORE[uLength];\r
71         DPM.GapOpenB = new SCORE[uLength];\r
72         DPM.GapCloseA = new SCORE[uLength];\r
73         DPM.GapCloseB = new SCORE[uLength];\r
74 \r
75         DPM.SortOrderA = new unsigned*[uLength];\r
76         DPM.FreqsA = new FCOUNT*[uLength];\r
77         DPM.ScoreMxB = new SCORE*[20];\r
78         DPM.MPrev = new SCORE[uLength];\r
79         DPM.MCurr = new SCORE[uLength];\r
80         DPM.MWork = new SCORE[uLength];\r
81 \r
82         DPM.DPrev = new SCORE[uLength];\r
83         DPM.DCurr = new SCORE[uLength];\r
84         DPM.DWork = new SCORE[uLength];\r
85         DPM.uDeletePos = new unsigned[uLength];\r
86 \r
87         DPM.TraceBack = new int*[uLength];\r
88 \r
89         for (unsigned uLetter = 0; uLetter < 20; ++uLetter)\r
90                 DPM.ScoreMxB[uLetter] = new SCORE[uLength];\r
91 \r
92         for (unsigned i = 0; i < uLength; ++i)\r
93                 {\r
94                 DPM.SortOrderA[i] = new unsigned[20];\r
95                 DPM.FreqsA[i] = new FCOUNT[20];\r
96                 DPM.TraceBack[i] = new int[uLength];\r
97                 }\r
98         }\r
99 \r
100 SCORE GlobalAlignSP(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,\r
101   unsigned uLengthB, PWPath &Path)\r
102         {\r
103         const unsigned uPrefixCountA = uLengthA + 1;\r
104         const unsigned uPrefixCountB = uLengthB + 1;\r
105 \r
106         AllocDPMem(uLengthA, uLengthB);\r
107 \r
108         SCORE *GapOpenA = DPM.GapOpenA;\r
109         SCORE *GapOpenB = DPM.GapOpenB;\r
110         SCORE *GapCloseA = DPM.GapCloseA;\r
111         SCORE *GapCloseB = DPM.GapCloseB;\r
112 \r
113         unsigned **SortOrderA = DPM.SortOrderA;\r
114         FCOUNT **FreqsA = DPM.FreqsA;\r
115         SCORE **ScoreMxB = DPM.ScoreMxB;\r
116         SCORE *MPrev = DPM.MPrev;\r
117         SCORE *MCurr = DPM.MCurr;\r
118         SCORE *MWork = DPM.MWork;\r
119 \r
120         SCORE *DPrev = DPM.DPrev;\r
121         SCORE *DCurr = DPM.DCurr;\r
122         SCORE *DWork = DPM.DWork;\r
123         unsigned *uDeletePos = DPM.uDeletePos;\r
124 \r
125         int **TraceBack = DPM.TraceBack;\r
126 \r
127         for (unsigned i = 0; i < uLengthA; ++i)\r
128                 {\r
129                 GapOpenA[i] = PA[i].m_scoreGapOpen;\r
130                 GapCloseA[i] = PA[i].m_scoreGapClose;\r
131 \r
132                 for (unsigned uLetter = 0; uLetter < 20; ++uLetter)\r
133                         {\r
134                         SortOrderA[i][uLetter] = PA[i].m_uSortOrder[uLetter];\r
135                         FreqsA[i][uLetter] = PA[i].m_fcCounts[uLetter];\r
136                         }\r
137                 }\r
138 \r
139         for (unsigned j = 0; j < uLengthB; ++j)\r
140                 {\r
141                 GapOpenB[j] = PB[j].m_scoreGapOpen;\r
142                 GapCloseB[j] = PB[j].m_scoreGapClose;\r
143                 }\r
144 \r
145         for (unsigned uLetter = 0; uLetter < 20; ++uLetter)\r
146                 {\r
147                 for (unsigned j = 0; j < uLengthB; ++j)\r
148                         ScoreMxB[uLetter][j] = PB[j].m_AAScores[uLetter];\r
149                 }\r
150 \r
151         for (unsigned i = 0; i < uPrefixCountA; ++i)\r
152                 memset(TraceBack[i], 0, uPrefixCountB*sizeof(int));\r
153 \r
154 // Special case for i=0\r
155         unsigned **ptrSortOrderA = SortOrderA;\r
156         FCOUNT **ptrFreqsA = FreqsA;\r
157         assert(ptrSortOrderA == &(SortOrderA[0]));\r
158         assert(ptrFreqsA == &(FreqsA[0]));\r
159         TraceBack[0][0] = 0;\r
160 \r
161         SCORE scoreSum = 0;\r
162         unsigned *ptrSortOrderAi = SortOrderA[0];\r
163         const unsigned *ptrSortOrderAEnd = ptrSortOrderAi + 20;\r
164         FCOUNT *ptrFreqsAi = FreqsA[0];\r
165         for (; ptrSortOrderAi != ptrSortOrderAEnd; ++ptrSortOrderAi)\r
166                 {\r
167                 const unsigned uLetter = *ptrSortOrderAi;\r
168                 const FCOUNT fcLetter = ptrFreqsAi[uLetter];\r
169                 if (0 == fcLetter)\r
170                         break;\r
171                 scoreSum += fcLetter*ScoreMxB[uLetter][0];\r
172                 }\r
173         MPrev[0] = scoreSum - g_scoreCenter;\r
174 \r
175 // D(0,0) is -infinity (requires I->D).\r
176         DPrev[0] = MINUS_INFINITY;\r
177 \r
178         for (unsigned j = 1; j < uLengthB; ++j)\r
179                 {\r
180         // Only way to get M(0, j) looks like this:\r
181         //              A       ----X\r
182         //              B       XXXXX\r
183         //                      0   j\r
184         // So gap-open at j=0, gap-close at j-1.\r
185                 SCORE scoreSum = 0;\r
186                 unsigned *ptrSortOrderAi = SortOrderA[0];\r
187                 const unsigned *ptrSortOrderAEnd = ptrSortOrderAi + 20;\r
188                 FCOUNT *ptrFreqsAi = FreqsA[0];\r
189                 for (; ptrSortOrderAi != ptrSortOrderAEnd; ++ptrSortOrderAi)\r
190                         {\r
191                         const unsigned uLetter = *ptrSortOrderAi;\r
192                         const FCOUNT fcLetter = ptrFreqsAi[uLetter];\r
193                         if (0 == fcLetter)\r
194                                 break;\r
195                         scoreSum += fcLetter*ScoreMxB[uLetter][j];\r
196                         }\r
197                 MPrev[j] = scoreSum - g_scoreCenter + GapOpenB[0] + GapCloseB[j-1];\r
198                 TraceBack[0][j] = -(int) j;\r
199 \r
200         // Assume no D->I transitions, then can't be a delete if only\r
201         // one letter from A.\r
202                 DPrev[j] = MINUS_INFINITY;\r
203                 }\r
204 \r
205         SCORE IPrev_j_1;\r
206         for (unsigned i = 1; i < uLengthA; ++i)\r
207                 {\r
208                 ++ptrSortOrderA;\r
209                 ++ptrFreqsA;\r
210                 assert(ptrSortOrderA == &(SortOrderA[i]));\r
211                 assert(ptrFreqsA == &(FreqsA[i]));\r
212 \r
213                 SCORE *ptrMCurr_j = MCurr;\r
214                 memset(ptrMCurr_j, 0, uLengthB*sizeof(SCORE));\r
215                 const FCOUNT *FreqsAi = *ptrFreqsA;\r
216 \r
217                 const unsigned *SortOrderAi = *ptrSortOrderA;\r
218                 const unsigned *ptrSortOrderAiEnd = SortOrderAi + 20;\r
219                 const SCORE *ptrMCurrMax = MCurr + uLengthB;\r
220                 for (const unsigned *ptrSortOrderAi = SortOrderAi;\r
221                   ptrSortOrderAi != ptrSortOrderAiEnd;\r
222                   ++ptrSortOrderAi)\r
223                         {\r
224                         const unsigned uLetter = *ptrSortOrderAi;\r
225                         SCORE *NSBR_Letter = ScoreMxB[uLetter];\r
226                         const FCOUNT fcLetter = FreqsAi[uLetter];\r
227                         if (0 == fcLetter)\r
228                                 break;\r
229                         SCORE *ptrNSBR = NSBR_Letter;\r
230                         for (SCORE *ptrMCurr = MCurr; ptrMCurr != ptrMCurrMax; ++ptrMCurr)\r
231                                 *ptrMCurr += fcLetter*(*ptrNSBR++);\r
232                         }\r
233 \r
234                 for (unsigned j = 0; j < uLengthB; ++j)\r
235                         MCurr[j] -= g_scoreCenter;\r
236 \r
237                 ptrMCurr_j = MCurr;\r
238                 unsigned *ptrDeletePos = uDeletePos;\r
239 \r
240         // Special case for j=0\r
241         // Only way to get M(i, 0) looks like this:\r
242         //                      0   i\r
243         //              A       XXXXX\r
244         //              B       ----X\r
245         // So gap-open at i=0, gap-close at i-1.\r
246                 assert(ptrMCurr_j == &(MCurr[0]));\r
247                 *ptrMCurr_j += GapOpenA[0] + GapCloseA[i-1];\r
248 \r
249                 ++ptrMCurr_j;\r
250 \r
251                 int *ptrTraceBack_ij = TraceBack[i];\r
252                 *ptrTraceBack_ij++ = (int) i;\r
253 \r
254                 SCORE *ptrMPrev_j = MPrev;\r
255                 SCORE *ptrDPrev = DPrev;\r
256                 SCORE d = *ptrDPrev;\r
257                 SCORE DNew = *ptrMPrev_j + GapOpenA[i];\r
258                 if (DNew > d)\r
259                         {\r
260                         d = DNew;\r
261                         *ptrDeletePos = i;\r
262                         }\r
263 \r
264                 SCORE *ptrDCurr = DCurr;\r
265 \r
266                 assert(ptrDCurr == &(DCurr[0]));\r
267                 *ptrDCurr = d;\r
268 \r
269         // Can't have an insert if no letters from B\r
270                 IPrev_j_1 = MINUS_INFINITY;\r
271 \r
272                 unsigned uInsertPos;\r
273                 const SCORE scoreGapOpenAi = GapOpenA[i];\r
274                 const SCORE scoreGapCloseAi_1 = GapCloseA[i-1];\r
275 \r
276                 for (unsigned j = 1; j < uLengthB; ++j)\r
277                         {\r
278                 // Here, MPrev_j is preserved from previous\r
279                 // iteration so with current i,j is M[i-1][j-1]\r
280                         SCORE MPrev_j = *ptrMPrev_j;\r
281                         SCORE INew = MPrev_j + GapOpenB[j];\r
282                         if (INew > IPrev_j_1)\r
283                                 {\r
284                                 IPrev_j_1 = INew;\r
285                                 uInsertPos = j;\r
286                                 }\r
287 \r
288                         SCORE scoreMax = MPrev_j;\r
289 \r
290                         assert(ptrDPrev == &(DPrev[j-1]));\r
291                         SCORE scoreD = *ptrDPrev++ + scoreGapCloseAi_1;\r
292                         if (scoreD > scoreMax)\r
293                                 {\r
294                                 scoreMax = scoreD;\r
295                                 assert(ptrDeletePos == &(uDeletePos[j-1]));\r
296                                 *ptrTraceBack_ij = (int) i - (int) *ptrDeletePos;\r
297                                 assert(*ptrTraceBack_ij > 0);\r
298                                 }\r
299                         ++ptrDeletePos;\r
300 \r
301                         SCORE scoreI = IPrev_j_1 + GapCloseB[j-1];\r
302                         if (scoreI > scoreMax)\r
303                                 {\r
304                                 scoreMax = scoreI;\r
305                                 *ptrTraceBack_ij = (int) uInsertPos - (int) j;\r
306                                 assert(*ptrTraceBack_ij < 0);\r
307                                 }\r
308 \r
309                         assert(ptrSortOrderA == &(SortOrderA[i]));\r
310                         assert(ptrFreqsA == &(FreqsA[i]));\r
311 \r
312                         *ptrMCurr_j += scoreMax;\r
313                         assert(ptrMCurr_j == &(MCurr[j]));\r
314                         ++ptrMCurr_j;\r
315 \r
316                         MPrev_j = *(++ptrMPrev_j);\r
317                         assert(ptrDPrev == &(DPrev[j]));\r
318                         SCORE d = *ptrDPrev;\r
319                         SCORE DNew = MPrev_j + scoreGapOpenAi;\r
320                         if (DNew > d)\r
321                                 {\r
322                                 d = DNew;\r
323                                 assert(ptrDeletePos == &uDeletePos[j]);\r
324                                 *ptrDeletePos = i;\r
325                                 }\r
326                         assert(ptrDCurr + 1 == &(DCurr[j]));\r
327                         *(++ptrDCurr) = d;\r
328 \r
329                         ++ptrTraceBack_ij;\r
330                         }\r
331 \r
332                 Rotate(MPrev, MCurr, MWork);\r
333                 Rotate(DPrev, DCurr, DWork);\r
334                 }\r
335 \r
336 // Special case for i=uLengthA\r
337         SCORE IPrev = MINUS_INFINITY;\r
338 \r
339         unsigned uInsertPos;\r
340 \r
341         for (unsigned j = 1; j < uLengthB; ++j)\r
342                 {\r
343                 SCORE INew = MPrev[j-1] + GapOpenB[j];\r
344                 if (INew > IPrev)\r
345                         {\r
346                         uInsertPos = j;\r
347                         IPrev = INew;\r
348                         }\r
349                 }\r
350 \r
351 // Special case for i=uLengthA, j=uLengthB\r
352         SCORE scoreMax = MPrev[uLengthB-1];\r
353         int iTraceBack = 0;\r
354 \r
355         SCORE scoreD = DPrev[uLengthB-1] + GapCloseA[uLengthA-1];\r
356         if (scoreD > scoreMax)\r
357                 {\r
358                 scoreMax = scoreD;\r
359                 iTraceBack = (int) uLengthA - (int) uDeletePos[uLengthB-1];\r
360                 }\r
361 \r
362         SCORE scoreI = IPrev + GapCloseB[uLengthB-1];\r
363         if (scoreI > scoreMax)\r
364                 {\r
365                 scoreMax = scoreI;\r
366                 iTraceBack = (int) uInsertPos - (int) uLengthB;\r
367                 }\r
368 \r
369         TraceBack[uLengthA][uLengthB] = iTraceBack;\r
370 \r
371         TraceBackToPath(TraceBack, uLengthA, uLengthB, Path);\r
372 \r
373         return scoreMax;\r
374         }\r