Next version of JABA
[jabaws.git] / binaries / src / muscle / nwdasimple.cpp
1 #include "muscle.h"\r
2 #include <math.h>\r
3 #include "pwpath.h"\r
4 #include "profile.h"\r
5 #include <stdio.h>\r
6 \r
7 #define TRACE   0\r
8 \r
9 bool g_bKeepSimpleDP;\r
10 SCORE *g_DPM;\r
11 SCORE *g_DPD;\r
12 SCORE *g_DPE;\r
13 SCORE *g_DPI;\r
14 SCORE *g_DPJ;\r
15 char *g_TBM;\r
16 char *g_TBD;\r
17 char *g_TBE;\r
18 char *g_TBI;\r
19 char *g_TBJ;\r
20 \r
21 #if     DOUBLE_AFFINE\r
22 \r
23 static char XlatEdgeType(char c)\r
24         {\r
25         if ('E' == c)\r
26                 return 'D';\r
27         if ('J' == c)\r
28                 return 'I';\r
29         return c;\r
30         }\r
31 \r
32 static const char *LocalScoreToStr(SCORE s)\r
33         {\r
34         static char str[16];\r
35         if (s < -100000)\r
36                 return "     *";\r
37         sprintf(str, "%6.1f", s);\r
38         return str;\r
39         }\r
40 \r
41 static void ListTB(const char *TBM_, const ProfPos *PA, const ProfPos *PB,\r
42   unsigned uPrefixCountA, unsigned uPrefixCountB)\r
43         {\r
44         Log("        ");\r
45         for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)\r
46                 {\r
47                 char c = ' ';\r
48                 if (uPrefixLengthB > 0)\r
49                         c = ConsensusChar(PB[uPrefixLengthB - 1]);\r
50                 Log(" %4u:%c", uPrefixLengthB, c);\r
51                 }\r
52         Log("\n");\r
53         for (unsigned uPrefixLengthA = 0; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)\r
54                 {\r
55                 char c = ' ';\r
56                 if (uPrefixLengthA > 0)\r
57                         c = ConsensusChar(PA[uPrefixLengthA - 1]);\r
58                 Log("%4u:%c  ", uPrefixLengthA, c);\r
59                 for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)\r
60                         Log(" %6c", TBM(uPrefixLengthA, uPrefixLengthB));\r
61                 Log("\n");\r
62                 }\r
63         }\r
64 \r
65 static void ListDP(const SCORE *DPM_, const ProfPos *PA, const ProfPos *PB,\r
66   unsigned uPrefixCountA, unsigned uPrefixCountB)\r
67         {\r
68         Log("        ");\r
69         for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)\r
70                 {\r
71                 char c = ' ';\r
72                 if (uPrefixLengthB > 0)\r
73                         c = ConsensusChar(PB[uPrefixLengthB - 1]);\r
74                 Log(" %4u:%c", uPrefixLengthB, c);\r
75                 }\r
76         Log("\n");\r
77         for (unsigned uPrefixLengthA = 0; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)\r
78                 {\r
79                 char c = ' ';\r
80                 if (uPrefixLengthA > 0)\r
81                         c = ConsensusChar(PA[uPrefixLengthA - 1]);\r
82                 Log("%4u:%c  ", uPrefixLengthA, c);\r
83                 for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)\r
84                         Log(" %s", LocalScoreToStr(DPM(uPrefixLengthA, uPrefixLengthB)));\r
85                 Log("\n");\r
86                 }\r
87         }\r
88 \r
89 SCORE NWDASimple(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,\r
90   unsigned uLengthB, PWPath &Path)\r
91         {\r
92         assert(uLengthB > 0 && uLengthA > 0);\r
93 \r
94         const unsigned uPrefixCountA = uLengthA + 1;\r
95         const unsigned uPrefixCountB = uLengthB + 1;\r
96 \r
97 // Allocate DP matrices\r
98         const size_t LM = uPrefixCountA*uPrefixCountB;\r
99         SCORE *DPL_ = new SCORE[LM];\r
100         SCORE *DPM_ = new SCORE[LM];\r
101         SCORE *DPD_ = new SCORE[LM];\r
102         SCORE *DPE_ = new SCORE[LM];\r
103         SCORE *DPI_ = new SCORE[LM];\r
104         SCORE *DPJ_ = new SCORE[LM];\r
105 \r
106         char *TBM_ = new char[LM];\r
107         char *TBD_ = new char[LM];\r
108         char *TBE_ = new char[LM];\r
109         char *TBI_ = new char[LM];\r
110         char *TBJ_ = new char[LM];\r
111 \r
112         memset(TBM_, '?', LM);\r
113         memset(TBD_, '?', LM);\r
114         memset(TBE_, '?', LM);\r
115         memset(TBI_, '?', LM);\r
116         memset(TBJ_, '?', LM);\r
117 \r
118         DPM(0, 0) = 0;\r
119         DPD(0, 0) = MINUS_INFINITY;\r
120         DPE(0, 0) = MINUS_INFINITY;\r
121         DPI(0, 0) = MINUS_INFINITY;\r
122         DPJ(0, 0) = MINUS_INFINITY;\r
123 \r
124         DPM(1, 0) = MINUS_INFINITY;\r
125         DPD(1, 0) = PA[0].m_scoreGapOpen;\r
126         DPE(1, 0) = PA[0].m_scoreGapOpen2;\r
127         TBD(1, 0) = 'D';\r
128         TBE(1, 0) = 'E';\r
129         DPI(1, 0) = MINUS_INFINITY;\r
130         DPJ(1, 0) = MINUS_INFINITY;\r
131 \r
132         DPM(0, 1) = MINUS_INFINITY;\r
133         DPD(0, 1) = MINUS_INFINITY;\r
134         DPE(0, 1) = MINUS_INFINITY;\r
135         DPI(0, 1) = PB[0].m_scoreGapOpen;\r
136         DPJ(0, 1) = PB[0].m_scoreGapOpen2;\r
137         TBI(0, 1) = 'I';\r
138         TBJ(0, 1) = 'J';\r
139 \r
140 // Empty prefix of B is special case\r
141         for (unsigned uPrefixLengthA = 2; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)\r
142                 {\r
143                 DPM(uPrefixLengthA, 0) = MINUS_INFINITY;\r
144 \r
145                 DPD(uPrefixLengthA, 0) = DPD(uPrefixLengthA - 1, 0) + g_scoreGapExtend;\r
146                 DPE(uPrefixLengthA, 0) = DPE(uPrefixLengthA - 1, 0) + g_scoreGapExtend2;\r
147 \r
148                 TBD(uPrefixLengthA, 0) = 'D';\r
149                 TBE(uPrefixLengthA, 0) = 'E';\r
150 \r
151                 DPI(uPrefixLengthA, 0) = MINUS_INFINITY;\r
152                 DPJ(uPrefixLengthA, 0) = MINUS_INFINITY;\r
153                 }\r
154 \r
155 // Empty prefix of A is special case\r
156         for (unsigned uPrefixLengthB = 2; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)\r
157                 {\r
158                 DPM(0, uPrefixLengthB) = MINUS_INFINITY;\r
159 \r
160                 DPD(0, uPrefixLengthB) = MINUS_INFINITY;\r
161                 DPE(0, uPrefixLengthB) = MINUS_INFINITY;\r
162 \r
163                 DPI(0, uPrefixLengthB) = DPI(0, uPrefixLengthB - 1) + g_scoreGapExtend;\r
164                 DPJ(0, uPrefixLengthB) = DPJ(0, uPrefixLengthB - 1) + g_scoreGapExtend2;\r
165 \r
166                 TBI(0, uPrefixLengthB) = 'I';\r
167                 TBJ(0, uPrefixLengthB) = 'J';\r
168                 }\r
169 \r
170 // Special case to agree with NWFast, no D-I transitions so...\r
171         DPD(uLengthA, 0) = MINUS_INFINITY;\r
172         DPE(uLengthA, 0) = MINUS_INFINITY;\r
173 //      DPI(0, uLengthB) = MINUS_INFINITY;\r
174 //      DPJ(0, uLengthB) = MINUS_INFINITY;\r
175 \r
176 // ============\r
177 // Main DP loop\r
178 // ============\r
179         SCORE scoreGapCloseB = MINUS_INFINITY;\r
180         SCORE scoreGapClose2B = MINUS_INFINITY;\r
181         for (unsigned uPrefixLengthB = 1; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)\r
182                 {\r
183                 const ProfPos &PPB = PB[uPrefixLengthB - 1];\r
184 \r
185                 SCORE scoreGapCloseA = MINUS_INFINITY;\r
186                 SCORE scoreGapClose2A = MINUS_INFINITY;\r
187                 for (unsigned uPrefixLengthA = 1; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)\r
188                         {\r
189                         const ProfPos &PPA = PA[uPrefixLengthA - 1];\r
190 \r
191                         {\r
192                 // Match M=LetterA+LetterB\r
193                         SCORE scoreLL = ScoreProfPos2(PPA, PPB);\r
194                         DPL(uPrefixLengthA, uPrefixLengthB) = scoreLL;\r
195 \r
196                         SCORE scoreMM = DPM(uPrefixLengthA-1, uPrefixLengthB-1);\r
197                         SCORE scoreDM = DPD(uPrefixLengthA-1, uPrefixLengthB-1) + scoreGapCloseA;\r
198                         SCORE scoreEM = DPE(uPrefixLengthA-1, uPrefixLengthB-1) + scoreGapClose2A;\r
199                         SCORE scoreIM = DPI(uPrefixLengthA-1, uPrefixLengthB-1) + scoreGapCloseB;\r
200                         SCORE scoreJM = DPJ(uPrefixLengthA-1, uPrefixLengthB-1) + scoreGapClose2B;\r
201 \r
202                         SCORE scoreBest;\r
203                         if (scoreMM >= scoreDM && scoreMM >= scoreEM && scoreMM >= scoreIM && scoreMM >= scoreJM)\r
204                                 {\r
205                                 scoreBest = scoreMM;\r
206                                 TBM(uPrefixLengthA, uPrefixLengthB) = 'M';\r
207                                 }\r
208                         else if (scoreDM >= scoreMM && scoreDM >= scoreEM && scoreDM >= scoreIM && scoreDM >= scoreJM)\r
209                                 {\r
210                                 scoreBest = scoreDM;\r
211                                 TBM(uPrefixLengthA, uPrefixLengthB) = 'D';\r
212                                 }\r
213                         else if (scoreEM >= scoreMM && scoreEM >= scoreDM && scoreEM >= scoreIM && scoreEM >= scoreJM)\r
214                                 {\r
215                                 scoreBest = scoreEM;\r
216                                 TBM(uPrefixLengthA, uPrefixLengthB) = 'E';\r
217                                 }\r
218                         else if (scoreIM >= scoreMM && scoreIM >= scoreDM && scoreIM >= scoreEM && scoreIM >= scoreJM)\r
219                                 {\r
220                                 scoreBest = scoreIM;\r
221                                 TBM(uPrefixLengthA, uPrefixLengthB) = 'I';\r
222                                 }\r
223                         else\r
224                                 {\r
225                                 assert(scoreJM >= scoreMM && scoreJM >= scoreDM && scoreJM >= scoreEM && scoreJM >= scoreIM);\r
226                                 scoreBest = scoreJM;\r
227                                 TBM(uPrefixLengthA, uPrefixLengthB) = 'J';\r
228                                 }\r
229                         DPM(uPrefixLengthA, uPrefixLengthB) = scoreBest + scoreLL;\r
230                         }\r
231 \r
232                         {\r
233                 // Delete D=LetterA+GapB\r
234                         SCORE scoreMD = DPM(uPrefixLengthA-1, uPrefixLengthB) +\r
235                           PA[uPrefixLengthA-1].m_scoreGapOpen;\r
236                         SCORE scoreDD = DPD(uPrefixLengthA-1, uPrefixLengthB) + g_scoreGapExtend;\r
237 \r
238                         SCORE scoreBest;\r
239                         if (scoreMD >= scoreDD)\r
240                                 {\r
241                                 scoreBest = scoreMD;\r
242                                 TBD(uPrefixLengthA, uPrefixLengthB) = 'M';\r
243                                 }\r
244                         else\r
245                                 {\r
246                                 assert(scoreDD >= scoreMD);\r
247                                 scoreBest = scoreDD;\r
248                                 TBD(uPrefixLengthA, uPrefixLengthB) = 'D';\r
249                                 }\r
250                         DPD(uPrefixLengthA, uPrefixLengthB) = scoreBest;\r
251                         }\r
252 \r
253                         {\r
254                 // Delete E=LetterA+GapB\r
255                         SCORE scoreME = DPM(uPrefixLengthA-1, uPrefixLengthB) +\r
256                           PA[uPrefixLengthA-1].m_scoreGapOpen2;\r
257                         SCORE scoreEE = DPE(uPrefixLengthA-1, uPrefixLengthB) + g_scoreGapExtend2;\r
258 \r
259                         SCORE scoreBest;\r
260                         if (scoreME >= scoreEE)\r
261                                 {\r
262                                 scoreBest = scoreME;\r
263                                 TBE(uPrefixLengthA, uPrefixLengthB) = 'M';\r
264                                 }\r
265                         else\r
266                                 {\r
267                                 assert(scoreEE >= scoreME);\r
268                                 scoreBest = scoreEE;\r
269                                 TBE(uPrefixLengthA, uPrefixLengthB) = 'E';\r
270                                 }\r
271                         DPE(uPrefixLengthA, uPrefixLengthB) = scoreBest;\r
272                         }\r
273 \r
274                 // Insert I=GapA+LetterB\r
275                         {\r
276                         SCORE scoreMI = DPM(uPrefixLengthA, uPrefixLengthB-1) +\r
277                           PB[uPrefixLengthB - 1].m_scoreGapOpen;\r
278                         SCORE scoreII = DPI(uPrefixLengthA, uPrefixLengthB-1) + g_scoreGapExtend;\r
279 \r
280                         SCORE scoreBest;\r
281                         if (scoreMI >= scoreII)\r
282                                 {\r
283                                 scoreBest = scoreMI;\r
284                                 TBI(uPrefixLengthA, uPrefixLengthB) = 'M';\r
285                                 }\r
286                         else \r
287                                 {\r
288                                 assert(scoreII > scoreMI);\r
289                                 scoreBest = scoreII;\r
290                                 TBI(uPrefixLengthA, uPrefixLengthB) = 'I';\r
291                                 }\r
292                         DPI(uPrefixLengthA, uPrefixLengthB) = scoreBest;\r
293                         }\r
294 \r
295                 // Insert J=GapA+LetterB\r
296                         {\r
297                         SCORE scoreMJ = DPM(uPrefixLengthA, uPrefixLengthB-1) +\r
298                           PB[uPrefixLengthB - 1].m_scoreGapOpen2;\r
299                         SCORE scoreJJ = DPJ(uPrefixLengthA, uPrefixLengthB-1) + g_scoreGapExtend2;\r
300 \r
301                         SCORE scoreBest;\r
302                         if (scoreMJ >= scoreJJ)\r
303                                 {\r
304                                 scoreBest = scoreMJ;\r
305                                 TBJ(uPrefixLengthA, uPrefixLengthB) = 'M';\r
306                                 }\r
307                         else \r
308                                 {\r
309                                 assert(scoreJJ > scoreMJ);\r
310                                 scoreBest = scoreJJ;\r
311                                 TBJ(uPrefixLengthA, uPrefixLengthB) = 'J';\r
312                                 }\r
313                         DPJ(uPrefixLengthA, uPrefixLengthB) = scoreBest;\r
314                         }\r
315 \r
316                         scoreGapCloseA = PPA.m_scoreGapClose;\r
317                         scoreGapClose2A = PPA.m_scoreGapClose2;\r
318                         }\r
319                 scoreGapCloseB = PPB.m_scoreGapClose;\r
320                 scoreGapClose2B = PPB.m_scoreGapClose2;\r
321                 }\r
322 \r
323 #if TRACE\r
324         Log("\n");\r
325         Log("DA Simple DPL:\n");\r
326         ListDP(DPL_, PA, PB, uPrefixCountA, uPrefixCountB);\r
327         Log("\n");\r
328         Log("DA Simple DPM:\n");\r
329         ListDP(DPM_, PA, PB, uPrefixCountA, uPrefixCountB);\r
330         Log("\n");\r
331         Log("DA Simple DPD:\n");\r
332         ListDP(DPD_, PA, PB, uPrefixCountA, uPrefixCountB);\r
333         Log("\n");\r
334         Log("DA Simple DPE:\n");\r
335         ListDP(DPE_, PA, PB, uPrefixCountA, uPrefixCountB);\r
336         Log("\n");\r
337         Log("DA Simple DPI:\n");\r
338         ListDP(DPI_, PA, PB, uPrefixCountA, uPrefixCountB);\r
339         Log("\n");\r
340         Log("DA Simple DPJ:\n");\r
341         ListDP(DPJ_, PA, PB, uPrefixCountA, uPrefixCountB);\r
342         Log("\n");\r
343         Log("DA Simple TBM:\n");\r
344         ListTB(TBM_, PA, PB, uPrefixCountA, uPrefixCountB);\r
345         Log("\n");\r
346         Log("DA Simple TBD:\n");\r
347         ListTB(TBD_, PA, PB, uPrefixCountA, uPrefixCountB);\r
348         Log("\n");\r
349         Log("DA Simple TBE:\n");\r
350         ListTB(TBE_, PA, PB, uPrefixCountA, uPrefixCountB);\r
351         Log("\n");\r
352         Log("DA Simple TBI:\n");\r
353         ListTB(TBI_, PA, PB, uPrefixCountA, uPrefixCountB);\r
354         Log("\n");\r
355         Log("DA Simple TBJ:\n");\r
356         ListTB(TBJ_, PA, PB, uPrefixCountA, uPrefixCountB);\r
357 #endif\r
358 \r
359 // Trace-back\r
360 // ==========\r
361         Path.Clear();\r
362 \r
363 // Find last edge\r
364         SCORE M = DPM(uLengthA, uLengthB);\r
365         SCORE D = DPD(uLengthA, uLengthB) + PA[uLengthA-1].m_scoreGapClose;\r
366         SCORE E = DPE(uLengthA, uLengthB) + PA[uLengthA-1].m_scoreGapClose2;\r
367         SCORE I = DPI(uLengthA, uLengthB) + PB[uLengthB-1].m_scoreGapClose;\r
368         SCORE J = DPJ(uLengthA, uLengthB) + PB[uLengthB-1].m_scoreGapClose2;\r
369         char cEdgeType = '?';\r
370 \r
371         SCORE BestScore = M;\r
372         cEdgeType = 'M';\r
373         if (D > BestScore)\r
374                 {\r
375                 cEdgeType = 'D';\r
376                 BestScore = D;\r
377                 }\r
378         if (E > BestScore)\r
379                 {\r
380                 cEdgeType = 'E';\r
381                 BestScore = E;\r
382                 }\r
383         if (I > BestScore)\r
384                 {\r
385                 cEdgeType = 'I';\r
386                 BestScore = I;\r
387                 }\r
388         if (J > BestScore)\r
389                 {\r
390                 cEdgeType = 'J';\r
391                 BestScore = J;\r
392                 }\r
393 \r
394 #if     TRACE\r
395         Log("DA Simple: MAB=%.4g DAB=%.4g EAB=%.4g IAB=%.4g JAB=%.4g best=%c\n",\r
396           M, D, E, I, J, cEdgeType);\r
397 #endif\r
398 \r
399         unsigned PLA = uLengthA;\r
400         unsigned PLB = uLengthB;\r
401         for (;;)\r
402                 {\r
403                 PWEdge Edge;\r
404                 Edge.cType = XlatEdgeType(cEdgeType);\r
405                 Edge.uPrefixLengthA = PLA;\r
406                 Edge.uPrefixLengthB = PLB;\r
407 #if     TRACE\r
408                 Log("Prepend %c%d.%d\n", Edge.cType, PLA, PLB);\r
409 #endif\r
410                 Path.PrependEdge(Edge);\r
411 \r
412                 switch (cEdgeType)\r
413                         {\r
414                 case 'M':\r
415                         assert(PLA > 0);\r
416                         assert(PLB > 0);\r
417                         cEdgeType = TBM(PLA, PLB);\r
418                         --PLA;\r
419                         --PLB;\r
420                         break;\r
421 \r
422                 case 'D':\r
423                         assert(PLA > 0);\r
424                         cEdgeType = TBD(PLA, PLB);\r
425                         --PLA;\r
426                         break;\r
427 \r
428                 case 'E':\r
429                         assert(PLA > 0);\r
430                         cEdgeType = TBE(PLA, PLB);\r
431                         --PLA;\r
432                         break;\r
433 \r
434                 case 'I':\r
435                         assert(PLB > 0);\r
436                         cEdgeType = TBI(PLA, PLB);\r
437                         --PLB;\r
438                         break;\r
439                 \r
440                 case 'J':\r
441                         assert(PLB > 0);\r
442                         cEdgeType = TBJ(PLA, PLB);\r
443                         --PLB;\r
444                         break;\r
445 \r
446                 default:\r
447                         Quit("Invalid edge %c", cEdgeType);\r
448                         }\r
449                 if (0 == PLA && 0 == PLB)\r
450                         break;\r
451                 }\r
452         Path.Validate();\r
453 \r
454 //      SCORE Score = TraceBack(PA, uLengthA, PB, uLengthB, DPM_, DPD_, DPI_, Path);\r
455 \r
456 #if     TRACE\r
457         SCORE scorePath = FastScorePath2(PA, uLengthA, PB, uLengthB, Path);\r
458         Path.LogMe();\r
459         Log("Score = %s Path = %s\n", LocalScoreToStr(BestScore), LocalScoreToStr(scorePath));\r
460 #endif\r
461 \r
462         if (g_bKeepSimpleDP)\r
463                 {\r
464                 g_DPM = DPM_;\r
465                 g_DPD = DPD_;\r
466                 g_DPE = DPE_;\r
467                 g_DPI = DPI_;\r
468                 g_DPJ = DPJ_;\r
469 \r
470                 g_TBM = TBM_;\r
471                 g_TBD = TBD_;\r
472                 g_TBE = TBE_;\r
473                 g_TBI = TBI_;\r
474                 g_TBJ = TBJ_;\r
475                 }\r
476         else\r
477                 {\r
478                 delete[] DPM_;\r
479                 delete[] DPD_;\r
480                 delete[] DPE_;\r
481                 delete[] DPI_;\r
482                 delete[] DPJ_;\r
483 \r
484                 delete[] TBM_;\r
485                 delete[] TBD_;\r
486                 delete[] TBE_;\r
487                 delete[] TBI_;\r
488                 delete[] TBJ_;\r
489                 }\r
490 \r
491         return BestScore;\r
492         }\r
493 \r
494 #endif // DOUBLE_AFFINE\r