Next version of JABA
[jabaws.git] / binaries / src / muscle / glbalndimer.cpp
1 #include "muscle.h"\r
2 #include <math.h>\r
3 #include <stdio.h>      // for sprintf\r
4 #include "pwpath.h"\r
5 #include "profile.h"\r
6 #include "gapscoredimer.h"\r
7 \r
8 #define TRACE   0\r
9 \r
10 static SCORE TraceBackDimer(  const SCORE *DPM_, const SCORE *DPD_, const SCORE *DPI_,\r
11   const char *TBM_, const char *TBD_, const char *TBI_,\r
12   unsigned uLengthA, unsigned uLengthB, PWPath &Path);\r
13 \r
14 static const char *LocalScoreToStr(SCORE s)\r
15         {\r
16         static char str[16];\r
17         if (MINUS_INFINITY == s)\r
18                 return "     *";\r
19         sprintf(str, "%6.3g", s);\r
20         return str;\r
21         }\r
22 \r
23 #if     TRACE\r
24 static void ListDP(const SCORE *DPM_, const ProfPos *PA, const ProfPos *PB,\r
25   unsigned uPrefixCountA, unsigned uPrefixCountB)\r
26         {\r
27         Log("        ");\r
28         for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)\r
29                 {\r
30                 char c = ' ';\r
31                 if (uPrefixLengthB > 0)\r
32                         c = ConsensusChar(PB[uPrefixLengthB - 1]);\r
33                 Log(" %4u:%c", uPrefixLengthB, c);\r
34                 }\r
35         Log("\n");\r
36         for (unsigned uPrefixLengthA = 0; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)\r
37                 {\r
38                 char c = ' ';\r
39                 if (uPrefixLengthA > 0)\r
40                         c = ConsensusChar(PA[uPrefixLengthA - 1]);\r
41                 Log("%4u:%c  ", uPrefixLengthA, c);\r
42                 for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)\r
43                         Log(" %s", LocalScoreToStr(DPM(uPrefixLengthA, uPrefixLengthB)));\r
44                 Log("\n");\r
45                 }\r
46         }\r
47 \r
48 static void ListTB(const char *TBM_, const ProfPos *PA, const ProfPos *PB,\r
49   unsigned uPrefixCountA, unsigned uPrefixCountB)\r
50         {\r
51         Log("        ");\r
52         for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)\r
53                 Log("%2d", uPrefixLengthB);\r
54         Log("\n");\r
55         Log("        ");\r
56         for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)\r
57                 {\r
58                 char c = ' ';\r
59                 if (uPrefixLengthB > 0)\r
60                         c = ConsensusChar(PB[uPrefixLengthB - 1]);\r
61                 Log(" %c", c);\r
62                 }\r
63         Log("\n");\r
64         for (unsigned uPrefixLengthA = 0; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)\r
65                 {\r
66                 char c = ' ';\r
67                 if (uPrefixLengthA > 0)\r
68                         c = ConsensusChar(PA[uPrefixLengthA - 1]);\r
69                 Log("%4u:%c  ", uPrefixLengthA, c);\r
70                 for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)\r
71                         Log(" %c", TBM(uPrefixLengthA, uPrefixLengthB));\r
72                 Log("\n");\r
73                 }\r
74         }\r
75 #endif // TRACE\r
76 \r
77 static ProfPos PPTerm;\r
78 static bool InitializePPTerm()\r
79         {\r
80         PPTerm.m_bAllGaps = false;\r
81         PPTerm.m_LL = 1;\r
82         PPTerm.m_LG = 0;\r
83         PPTerm.m_GL = 0;\r
84         PPTerm.m_GG = 0;\r
85         PPTerm.m_fOcc = 1;\r
86         return true;\r
87         }\r
88 static bool PPTermInitialized = InitializePPTerm();\r
89 \r
90 static SCORE ScoreProfPosDimerLE(const ProfPos &PPA, const ProfPos &PPB)\r
91         {\r
92         SCORE Score = 0;\r
93         for (unsigned n = 0; n < 20; ++n)\r
94                 {\r
95                 const unsigned uLetter = PPA.m_uSortOrder[n];\r
96                 const FCOUNT fcLetter = PPA.m_fcCounts[uLetter];\r
97                 if (0 == fcLetter)\r
98                         break;\r
99                 Score += fcLetter*PPB.m_AAScores[uLetter];\r
100                 }\r
101         if (0 == Score)\r
102                 return -2.5;\r
103         SCORE logScore = logf(Score);\r
104         return (SCORE) (logScore*(PPA.m_fOcc * PPB.m_fOcc));\r
105         }\r
106 \r
107 static SCORE ScoreProfPosDimerPSP(const ProfPos &PPA, const ProfPos &PPB)\r
108         {\r
109         SCORE Score = 0;\r
110         for (unsigned n = 0; n < 20; ++n)\r
111                 {\r
112                 const unsigned uLetter = PPA.m_uSortOrder[n];\r
113                 const FCOUNT fcLetter = PPA.m_fcCounts[uLetter];\r
114                 if (0 == fcLetter)\r
115                         break;\r
116                 Score += fcLetter*PPB.m_AAScores[uLetter];\r
117                 }\r
118         return Score;\r
119         }\r
120 \r
121 static SCORE ScoreProfPosDimer(const ProfPos &PPA, const ProfPos &PPB)\r
122         {\r
123         switch (g_PPScore)\r
124                 {\r
125         case PPSCORE_LE:\r
126                 return ScoreProfPosDimerLE(PPA, PPB);\r
127 \r
128         case PPSCORE_SP:\r
129         case PPSCORE_SV:\r
130                 return ScoreProfPosDimerPSP(PPA, PPB);\r
131                 }\r
132         Quit("Invalid g_PPScore");\r
133         return 0;\r
134         }\r
135 \r
136 // Global alignment dynamic programming\r
137 // This variant optimizes the profile-profile SP score under the\r
138 // dimer approximation.\r
139 SCORE GlobalAlignDimer(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,\r
140   unsigned uLengthB, PWPath &Path)\r
141         {\r
142         assert(uLengthB > 0 && uLengthA > 0);\r
143 \r
144         const unsigned uPrefixCountA = uLengthA + 1;\r
145         const unsigned uPrefixCountB = uLengthB + 1;\r
146 \r
147 // Allocate DP matrices\r
148         const size_t LM = uPrefixCountA*uPrefixCountB;\r
149         SCORE *DPM_ = new SCORE[LM];\r
150         SCORE *DPD_ = new SCORE[LM];\r
151         SCORE *DPI_ = new SCORE[LM];\r
152 \r
153         char *TBM_ = new char[LM];\r
154         char *TBD_ = new char[LM];\r
155         char *TBI_ = new char[LM];\r
156 \r
157         DPM(0, 0) = 0;\r
158         DPD(0, 0) = MINUS_INFINITY;\r
159         DPI(0, 0) = MINUS_INFINITY;\r
160 \r
161         TBM(0, 0) = 'S';\r
162         TBD(0, 0) = '?';\r
163         TBI(0, 0) = '?';\r
164 \r
165         DPM(1, 0) = MINUS_INFINITY;\r
166         DPD(1, 0) = GapScoreMD(PA[0], PPTerm);\r
167         DPI(1, 0) = MINUS_INFINITY;\r
168 \r
169         TBM(1, 0) = '?';\r
170         TBD(1, 0) = 'S';\r
171         TBI(1, 0) = '?';\r
172 \r
173         DPM(0, 1) = MINUS_INFINITY;\r
174         DPD(0, 1) = MINUS_INFINITY;\r
175         DPI(0, 1) = GapScoreMI(PPTerm, PB[0]);\r
176 \r
177         TBM(0, 1) = '?';\r
178         TBD(0, 1) = '?';\r
179         TBI(0, 1) = 'S';\r
180 \r
181 // Empty prefix of B is special case\r
182         for (unsigned uPrefixLengthA = 2; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)\r
183                 {\r
184         // M=LetterA+LetterB, impossible with empty prefix\r
185                 DPM(uPrefixLengthA, 0) = MINUS_INFINITY;\r
186                 TBM(uPrefixLengthA, 0) = '?';\r
187 \r
188         // D=LetterA+GapB\r
189                 DPD(uPrefixLengthA, 0) = DPD(uPrefixLengthA - 1, 0) +\r
190                   GapScoreDD(PA[uPrefixLengthA - 1], PPTerm);\r
191                 TBD(uPrefixLengthA, 0) = 'D';\r
192 \r
193         // I=GapA+LetterB, impossible with empty prefix\r
194                 DPI(uPrefixLengthA, 0) = MINUS_INFINITY;\r
195                 TBI(uPrefixLengthA, 0) = '?';\r
196                 }\r
197 \r
198 // Empty prefix of A is special case\r
199         for (unsigned uPrefixLengthB = 2; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)\r
200                 {\r
201         // M=LetterA+LetterB, impossible with empty prefix\r
202                 DPM(0, uPrefixLengthB) = MINUS_INFINITY;\r
203                 TBM(0, uPrefixLengthB) = '?';\r
204 \r
205         // D=LetterA+GapB, impossible with empty prefix\r
206                 DPD(0, uPrefixLengthB) = MINUS_INFINITY;\r
207                 TBD(0, uPrefixLengthB) = '?';\r
208 \r
209         // I=GapA+LetterB\r
210                 DPI(0, uPrefixLengthB) = DPI(0, uPrefixLengthB - 1) +\r
211                   GapScoreII(PPTerm, PB[uPrefixLengthB - 1]);\r
212                 TBI(0, uPrefixLengthB) = 'I';\r
213                 }\r
214 \r
215 // ============\r
216 // Main DP loop\r
217 // ============\r
218         for (unsigned uPrefixLengthB = 1; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)\r
219                 {\r
220                 const ProfPos &PPB = PB[uPrefixLengthB - 1];\r
221                 for (unsigned uPrefixLengthA = 1; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)\r
222                         {\r
223                         const ProfPos &PPA = PA[uPrefixLengthA - 1];\r
224                         {\r
225                 // Match M=LetterA+LetterB\r
226                         SCORE scoreLL = ScoreProfPosDimer(PPA, PPB);\r
227 \r
228                         SCORE scoreMM = DPM(uPrefixLengthA-1, uPrefixLengthB-1) + GapScoreMM(PPA, PPB);\r
229                         SCORE scoreDM = DPD(uPrefixLengthA-1, uPrefixLengthB-1) + GapScoreDM(PPA, PPB);\r
230                         SCORE scoreIM = DPI(uPrefixLengthA-1, uPrefixLengthB-1) + GapScoreIM(PPA, PPB);\r
231 \r
232                         SCORE scoreBest = scoreMM;\r
233                         char c = 'M';\r
234                         if (scoreDM > scoreBest)\r
235                                 {\r
236                                 scoreBest = scoreDM;\r
237                                 c = 'D';\r
238                                 }\r
239                         if (scoreIM > scoreBest)\r
240                                 {\r
241                                 scoreBest = scoreIM;\r
242                                 c = 'I';\r
243                                 }\r
244 \r
245                         DPM(uPrefixLengthA, uPrefixLengthB) = scoreBest + scoreLL;\r
246                         TBM(uPrefixLengthA, uPrefixLengthB) = c;\r
247                         }\r
248                         {\r
249                 // Delete D=LetterA+GapB\r
250                         SCORE scoreMD = DPM(uPrefixLengthA-1, uPrefixLengthB) + GapScoreMD(PPA, PPB);\r
251                         SCORE scoreDD = DPD(uPrefixLengthA-1, uPrefixLengthB) + GapScoreDD(PPA, PPB);\r
252                         SCORE scoreID = DPI(uPrefixLengthA-1, uPrefixLengthB) + GapScoreID(PPA, PPB);\r
253 \r
254                         SCORE scoreBest = scoreMD;\r
255                         char c = 'M';\r
256                         if (scoreDD > scoreBest)\r
257                                 {\r
258                                 scoreBest = scoreDD;\r
259                                 c = 'D';\r
260                                 }\r
261                         if (scoreID > scoreBest)\r
262                                 {\r
263                                 scoreBest = scoreID;\r
264                                 c = 'I';\r
265                                 }\r
266 \r
267                         DPD(uPrefixLengthA, uPrefixLengthB) = scoreBest;\r
268                         TBD(uPrefixLengthA, uPrefixLengthB) = c;\r
269                         }\r
270                         {\r
271                 // Insert I=GapA+LetterB\r
272                         SCORE scoreMI = DPM(uPrefixLengthA, uPrefixLengthB-1) + GapScoreMI(PPA, PPB);\r
273                         SCORE scoreDI = DPD(uPrefixLengthA, uPrefixLengthB-1) + GapScoreDI(PPA, PPB);\r
274                         SCORE scoreII = DPI(uPrefixLengthA, uPrefixLengthB-1) + GapScoreII(PPA, PPB);\r
275 \r
276                         SCORE scoreBest = scoreMI;\r
277                         char c = 'M';\r
278                         if (scoreDI > scoreBest)\r
279                                 {\r
280                                 scoreBest = scoreDI;\r
281                                 c = 'D';\r
282                                 }\r
283                         if (scoreII > scoreBest)\r
284                                 {\r
285                                 scoreBest = scoreII;\r
286                                 c = 'I';\r
287                                 }\r
288 \r
289                         DPI(uPrefixLengthA, uPrefixLengthB) = scoreBest;\r
290                         TBI(uPrefixLengthA, uPrefixLengthB) = c;\r
291                         }\r
292                         }\r
293                 }\r
294 \r
295 #if TRACE\r
296         Log("DPM:\n");\r
297         ListDP(DPM_, PA, PB, uPrefixCountA, uPrefixCountB);\r
298         Log("DPD:\n");\r
299         ListDP(DPD_, PA, PB, uPrefixCountA, uPrefixCountB);\r
300         Log("DPI:\n");\r
301         ListDP(DPI_, PA, PB, uPrefixCountA, uPrefixCountB);\r
302         Log("TBM:\n");\r
303         ListTB(TBM_, PA, PB, uPrefixCountA, uPrefixCountB);\r
304         Log("TBD:\n");\r
305         ListTB(TBD_, PA, PB, uPrefixCountA, uPrefixCountB);\r
306         Log("TBI:\n");\r
307         ListTB(TBI_, PA, PB, uPrefixCountA, uPrefixCountB);\r
308 #endif\r
309 \r
310         SCORE Score = TraceBackDimer(DPM_, DPD_, DPI_, TBM_, TBD_, TBI_,\r
311           uLengthA, uLengthB, Path);\r
312 \r
313 #if     TRACE\r
314         Log("GlobalAlignDimer score = %.3g\n", Score);\r
315 #endif\r
316 \r
317         delete[] DPM_;\r
318         delete[] DPD_;\r
319         delete[] DPI_;\r
320 \r
321         delete[] TBM_;\r
322         delete[] TBD_;\r
323         delete[] TBI_;\r
324 \r
325         return Score;\r
326         }\r
327 \r
328 static SCORE TraceBackDimer(  const SCORE *DPM_, const SCORE *DPD_, const SCORE *DPI_,\r
329   const char *TBM_, const char *TBD_, const char *TBI_,\r
330   unsigned uLengthA, unsigned uLengthB, PWPath &Path)\r
331         {\r
332         const unsigned uPrefixCountA = uLengthA + 1;\r
333 \r
334         unsigned uPrefixLengthA = uLengthA;\r
335         unsigned uPrefixLengthB = uLengthB;\r
336 \r
337         char cEdge = 'M';\r
338         SCORE scoreMax = DPM(uLengthA, uLengthB);\r
339         if (DPD(uLengthA, uLengthB) > scoreMax)\r
340                 {\r
341                 scoreMax = DPD(uLengthA, uLengthB);\r
342                 cEdge = 'D';\r
343                 }\r
344         if (DPI(uLengthA, uLengthB) > scoreMax)\r
345                 {\r
346                 scoreMax = DPI(uLengthA, uLengthB);\r
347                 cEdge = 'I';\r
348                 }\r
349 \r
350         for (;;)\r
351                 {\r
352                 if (0 == uPrefixLengthA && 0 == uPrefixLengthB)\r
353                         break;\r
354 \r
355                 PWEdge Edge;\r
356                 Edge.cType = cEdge;\r
357                 Edge.uPrefixLengthA = uPrefixLengthA;\r
358                 Edge.uPrefixLengthB = uPrefixLengthB;\r
359                 Path.PrependEdge(Edge);\r
360 \r
361 #if TRACE\r
362                 Log("PLA=%u PLB=%u Edge=%c\n", uPrefixLengthA, uPrefixLengthB, cEdge);\r
363 #endif\r
364                 switch (cEdge)\r
365                         {\r
366                 case 'M':\r
367                         assert(uPrefixLengthA > 0 && uPrefixLengthB > 0);\r
368                         cEdge = TBM(uPrefixLengthA, uPrefixLengthB);\r
369                         --uPrefixLengthA;\r
370                         --uPrefixLengthB;\r
371                         break;\r
372                 case 'D':\r
373                         assert(uPrefixLengthA > 0);\r
374                         cEdge = TBD(uPrefixLengthA, uPrefixLengthB);\r
375                         --uPrefixLengthA;\r
376                         break;\r
377                 case 'I':\r
378                         assert(uPrefixLengthB > 0);\r
379                         cEdge = TBI(uPrefixLengthA, uPrefixLengthB);\r
380                         --uPrefixLengthB;\r
381                         break;\r
382                 default:\r
383                         Quit("Invalid edge PLA=%u PLB=%u %c", uPrefixLengthA, uPrefixLengthB, cEdge);\r
384                         }\r
385                 }\r
386 #if     TRACE\r
387         Path.LogMe();\r
388 #endif\r
389         return scoreMax;\r
390         }\r