Next version of JABA
[jabaws.git] / binaries / src / muscle / aligngivenpath.cpp
1 #include "muscle.h"\r
2 #include "msa.h"\r
3 #include "pwpath.h"\r
4 #include "profile.h"\r
5 \r
6 #define TRACE   0\r
7 \r
8 static void LogPP(const ProfPos &PP)\r
9         {\r
10         Log("ResidueGroup   %u\n", PP.m_uResidueGroup);\r
11         Log("AllGaps      %d\n", PP.m_bAllGaps);\r
12         Log("Occ          %.3g\n", PP.m_fOcc);\r
13         Log("LL=%.3g LG=%.3g GL=%.3g GG=%.3g\n", PP.m_LL, PP.m_LG, PP.m_GL, PP.m_GG);\r
14         Log("Freqs        ");\r
15         for (unsigned i = 0; i < 20; ++i)\r
16                 if (PP.m_fcCounts[i] > 0)\r
17                         Log("%c=%.3g ", LetterToChar(i), PP.m_fcCounts[i]);\r
18         Log("\n");\r
19         }\r
20 \r
21 static void AssertProfPosEq(const ProfPos *PA, const ProfPos *PB, unsigned i)\r
22         {\r
23         const ProfPos &PPA = PA[i];\r
24         const ProfPos &PPB = PB[i];\r
25 #define eq(x)   if (PPA.m_##x != PPB.m_##x) { LogPP(PPA); LogPP(PPB); Quit("AssertProfPosEq." #x); }\r
26 #define be(x)   if (!BTEq(PPA.m_##x, PPB.m_##x)) { LogPP(PPA); LogPP(PPB); Quit("AssertProfPosEq." #x); }\r
27         eq(bAllGaps)\r
28         eq(uResidueGroup)\r
29 \r
30         be(LL)\r
31         be(LG)\r
32         be(GL)\r
33         be(GG)\r
34         be(fOcc)\r
35         be(scoreGapOpen)\r
36         be(scoreGapClose)\r
37 \r
38         for (unsigned j = 0; j < 20; ++j)\r
39                 {\r
40 #define eqj(x)  if (PPA.m_##x != PPB.m_##x) Quit("AssertProfPosEq j=%u " #x, j);\r
41 #define bej(x)  if (!BTEq(PPA.m_##x, PPB.m_##x)) Quit("AssertProfPosEq j=%u " #x, j);\r
42                 bej(fcCounts[j]);\r
43 //              eqj(uSortOrder[j]) // may differ due to ties, don't check?\r
44                 bej(AAScores[j])\r
45 #undef eqj\r
46 #undef bej\r
47                 }\r
48 #undef eq\r
49 #undef be\r
50         }\r
51 \r
52 void AssertProfsEq(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,\r
53   unsigned uLengthB)\r
54         {\r
55         if (uLengthA != uLengthB)\r
56                 Quit("AssertProfsEq: lengths differ %u %u", uLengthA, uLengthB);\r
57         for (unsigned i = 0; i < uLengthB; ++i)\r
58                 AssertProfPosEq(PA, PB, i);\r
59         }\r
60 \r
61 #if     DEBUG\r
62 static void ValidateProf(const ProfPos *Prof, unsigned uLength)\r
63         {\r
64         for (unsigned i = 0; i < uLength; ++i)\r
65                 {\r
66                 const ProfPos &PP = Prof[i];\r
67 \r
68                 FCOUNT s1 = PP.m_LL + PP.m_LG + PP.m_GL + PP.m_GG;\r
69                 assert(BTEq(s1, 1.0));\r
70 \r
71                 if (i > 0)\r
72                         {\r
73                         const ProfPos &PPPrev = Prof[i-1];\r
74                         FCOUNT s2 = PPPrev.m_LL + PPPrev.m_GL;\r
75                         FCOUNT s3 = PP.m_LL + PP.m_LG;\r
76                         assert(BTEq(s2, s3));\r
77                         }\r
78                 if (i < uLength - 1)\r
79                         {\r
80                         const ProfPos &PPNext = Prof[i+1];\r
81                         FCOUNT s4 = PP.m_LL + PP.m_GL;\r
82                         FCOUNT s5 = PPNext.m_LL + PPNext.m_LG;\r
83                         assert(BTEq(s4, s5));\r
84                         }\r
85                 }\r
86         }\r
87 #else\r
88 #define ValidateProf(Prof, Length)      /* empty */\r
89 #endif\r
90 \r
91 static void ScoresFromFreqsPos(ProfPos *Prof, unsigned uLength, unsigned uPos)\r
92         {\r
93         ProfPos &PP = Prof[uPos];\r
94         SortCounts(PP.m_fcCounts, PP.m_uSortOrder);\r
95         PP.m_uResidueGroup = ResidueGroupFromFCounts(PP.m_fcCounts);\r
96 \r
97 // "Occupancy"\r
98         PP.m_fOcc = PP.m_LL + PP.m_GL;\r
99 \r
100 // Frequency of gap-opens in this position (i)\r
101 // Gap open     = letter in i-1 and gap in i\r
102 //                              = iff LG in i\r
103         FCOUNT fcOpen = PP.m_LG;\r
104 \r
105 // Frequency of gap-closes in this position\r
106 // Gap close    = gap in i and letter in i+1\r
107 //                              = iff GL in i+1\r
108         FCOUNT fcClose;\r
109         if (uPos + 1 < uLength)\r
110                 fcClose = Prof[uPos + 1].m_GL;\r
111         else\r
112                 fcClose = PP.m_GG + PP.m_LG;\r
113 \r
114         PP.m_scoreGapOpen = (SCORE) ((1.0 - fcOpen)*g_scoreGapOpen/2.0);\r
115         PP.m_scoreGapClose = (SCORE) ((1.0 - fcClose)*g_scoreGapOpen/2.0);\r
116 #if     DOUBLE_AFFINE\r
117         PP.m_scoreGapOpen2 = (SCORE) ((1.0 - fcOpen)*g_scoreGapOpen2/2.0);\r
118         PP.m_scoreGapClose2 = (SCORE) ((1.0 - fcClose)*g_scoreGapOpen2/2.0);\r
119 #endif\r
120 \r
121         for (unsigned i = 0; i < g_AlphaSize; ++i)\r
122                 {\r
123                 SCORE scoreSum = 0;\r
124                 for (unsigned j = 0; j < g_AlphaSize; ++j)\r
125                         scoreSum += PP.m_fcCounts[j]*(*g_ptrScoreMatrix)[i][j];\r
126                 PP.m_AAScores[i] = scoreSum;\r
127                 }\r
128         }\r
129 \r
130 void ProfScoresFromFreqs(ProfPos *Prof, unsigned uLength)\r
131         {\r
132         for (unsigned i = 0; i < uLength; ++i)\r
133                 ScoresFromFreqsPos(Prof, uLength, i);\r
134         }\r
135 \r
136 static void AppendDelete(const MSA &msaA, unsigned &uColIndexA,\r
137   unsigned uSeqCountA, unsigned uSeqCountB, MSA &msaCombined,\r
138   unsigned &uColIndexCombined)\r
139         {\r
140 #if     TRACE\r
141         Log("AppendDelete ColIxA=%u ColIxCmb=%u\n",\r
142           uColIndexA, uColIndexCombined);\r
143 #endif\r
144         for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)\r
145                 {\r
146                 char c = msaA.GetChar(uSeqIndexA, uColIndexA);\r
147                 msaCombined.SetChar(uSeqIndexA, uColIndexCombined, c);\r
148                 }\r
149 \r
150         for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)\r
151                 msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined, '-');\r
152 \r
153         ++uColIndexCombined;\r
154         ++uColIndexA;\r
155         }\r
156 \r
157 static void AppendInsert(const MSA &msaB, unsigned &uColIndexB,\r
158   unsigned uSeqCountA, unsigned uSeqCountB, MSA &msaCombined,\r
159   unsigned &uColIndexCombined)\r
160         {\r
161 #if     TRACE\r
162         Log("AppendInsert ColIxB=%u ColIxCmb=%u\n",\r
163           uColIndexB, uColIndexCombined);\r
164 #endif\r
165         for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)\r
166                 msaCombined.SetChar(uSeqIndexA, uColIndexCombined, '-');\r
167 \r
168         for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)\r
169                 {\r
170                 char c = msaB.GetChar(uSeqIndexB, uColIndexB);\r
171                 msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined, c);\r
172                 }\r
173 \r
174         ++uColIndexCombined;\r
175         ++uColIndexB;\r
176         }\r
177 \r
178 static void AppendTplInserts(const MSA &msaA, unsigned &uColIndexA, unsigned uColCountA,\r
179   const MSA &msaB, unsigned &uColIndexB, unsigned uColCountB, unsigned uSeqCountA,\r
180   unsigned uSeqCountB, MSA &msaCombined, unsigned &uColIndexCombined)\r
181         {\r
182 #if     TRACE\r
183         Log("AppendTplInserts ColIxA=%u ColIxB=%u ColIxCmb=%u\n",\r
184           uColIndexA, uColIndexB, uColIndexCombined);\r
185 #endif\r
186         const unsigned uLengthA = msaA.GetColCount();\r
187         const unsigned uLengthB = msaB.GetColCount();\r
188 \r
189         unsigned uNewColCount = uColCountA;\r
190         if (uColCountB > uNewColCount)\r
191                 uNewColCount = uColCountB;\r
192 \r
193         for (unsigned n = 0; n < uColCountA; ++n)\r
194                 {\r
195                 for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)\r
196                         {\r
197                         char c = msaA.GetChar(uSeqIndexA, uColIndexA + n);\r
198                         c = UnalignChar(c);\r
199                         msaCombined.SetChar(uSeqIndexA, uColIndexCombined + n, c);\r
200                         }\r
201                 }\r
202         for (unsigned n = uColCountA; n < uNewColCount; ++n)\r
203                 {\r
204                 for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)\r
205                         msaCombined.SetChar(uSeqIndexA, uColIndexCombined + n, '.');\r
206                 }\r
207 \r
208         for (unsigned n = 0; n < uColCountB; ++n)\r
209                 {\r
210                 for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)\r
211                         {\r
212                         char c = msaB.GetChar(uSeqIndexB, uColIndexB + n);\r
213                         c = UnalignChar(c);\r
214                         msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined + n, c);\r
215                         }\r
216                 }\r
217         for (unsigned n = uColCountB; n < uNewColCount; ++n)\r
218                 {\r
219                 for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)\r
220                         msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined + n, '.');\r
221                 }\r
222 \r
223         uColIndexCombined += uNewColCount;\r
224         uColIndexA += uColCountA;\r
225         uColIndexB += uColCountB;\r
226         }\r
227 \r
228 static void AppendMatch(const MSA &msaA, unsigned &uColIndexA, const MSA &msaB,\r
229   unsigned &uColIndexB, unsigned uSeqCountA, unsigned uSeqCountB,\r
230   MSA &msaCombined, unsigned &uColIndexCombined)\r
231         {\r
232 #if     TRACE\r
233         Log("AppendMatch ColIxA=%u ColIxB=%u ColIxCmb=%u\n",\r
234           uColIndexA, uColIndexB, uColIndexCombined);\r
235 #endif\r
236 \r
237         for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)\r
238                 {\r
239                 char c = msaA.GetChar(uSeqIndexA, uColIndexA);\r
240                 msaCombined.SetChar(uSeqIndexA, uColIndexCombined, c);\r
241                 }\r
242 \r
243         for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)\r
244                 {\r
245                 char c = msaB.GetChar(uSeqIndexB, uColIndexB);\r
246                 msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined, c);\r
247                 }\r
248 \r
249         ++uColIndexA;\r
250         ++uColIndexB;\r
251         ++uColIndexCombined;\r
252         }\r
253 \r
254 void AlignTwoMSAsGivenPath(const PWPath &Path, const MSA &msaA, const MSA &msaB,\r
255   MSA &msaCombined)\r
256         {\r
257         msaCombined.Clear();\r
258 \r
259 #if     TRACE\r
260         Log("FastAlignProfiles\n");\r
261         Log("Template A:\n");\r
262         msaA.LogMe();\r
263         Log("Template B:\n");\r
264         msaB.LogMe();\r
265 #endif\r
266 \r
267         const unsigned uColCountA = msaA.GetColCount();\r
268         const unsigned uColCountB = msaB.GetColCount();\r
269 \r
270         const unsigned uSeqCountA = msaA.GetSeqCount();\r
271         const unsigned uSeqCountB = msaB.GetSeqCount();\r
272 \r
273         msaCombined.SetSeqCount(uSeqCountA + uSeqCountB);\r
274 \r
275 // Copy sequence names into combined MSA\r
276         for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)\r
277                 {\r
278                 msaCombined.SetSeqName(uSeqIndexA, msaA.GetSeqName(uSeqIndexA));\r
279                 msaCombined.SetSeqId(uSeqIndexA, msaA.GetSeqId(uSeqIndexA));\r
280                 }\r
281 \r
282         for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)\r
283                 {\r
284                 msaCombined.SetSeqName(uSeqCountA + uSeqIndexB, msaB.GetSeqName(uSeqIndexB));\r
285                 msaCombined.SetSeqId(uSeqCountA + uSeqIndexB, msaB.GetSeqId(uSeqIndexB));\r
286                 }\r
287 \r
288         unsigned uColIndexA = 0;\r
289         unsigned uColIndexB = 0;\r
290         unsigned uColIndexCombined = 0;\r
291         const unsigned uEdgeCount = Path.GetEdgeCount();\r
292         for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)\r
293                 {\r
294                 const PWEdge &Edge = Path.GetEdge(uEdgeIndex);\r
295 #if     TRACE\r
296                 Log("\nEdge %u %c%u.%u\n",\r
297                   uEdgeIndex,\r
298                   Edge.cType,\r
299                   Edge.uPrefixLengthA,\r
300                   Edge.uPrefixLengthB);\r
301 #endif\r
302                 const char cType = Edge.cType;\r
303                 const unsigned uPrefixLengthA = Edge.uPrefixLengthA;\r
304                 unsigned uColCountA = 0;\r
305                 if (uPrefixLengthA > 0)\r
306                         {\r
307                         const unsigned uNodeIndexA = uPrefixLengthA - 1;\r
308                         const unsigned uTplColIndexA = uNodeIndexA;\r
309                         if (uTplColIndexA > uColIndexA)\r
310                                 uColCountA = uTplColIndexA - uColIndexA;\r
311                         }\r
312 \r
313                 const unsigned uPrefixLengthB = Edge.uPrefixLengthB;\r
314                 unsigned uColCountB = 0;\r
315                 if (uPrefixLengthB > 0)\r
316                         {\r
317                         const unsigned uNodeIndexB = uPrefixLengthB - 1;\r
318                         const unsigned uTplColIndexB = uNodeIndexB;\r
319                         if (uTplColIndexB > uColIndexB)\r
320                                 uColCountB = uTplColIndexB - uColIndexB;\r
321                         }\r
322 \r
323 // TODO: This code looks like a hangover from HMM estimation -- can we delete it?\r
324                 assert(uColCountA == 0);\r
325                 assert(uColCountB == 0);\r
326                 AppendTplInserts(msaA, uColIndexA, uColCountA, msaB, uColIndexB, uColCountB,\r
327                   uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);\r
328 \r
329                 switch (cType)\r
330                         {\r
331                 case 'M':\r
332                         {\r
333                         assert(uPrefixLengthA > 0);\r
334                         assert(uPrefixLengthB > 0);\r
335                         const unsigned uColA = uPrefixLengthA - 1;\r
336                         const unsigned uColB = uPrefixLengthB - 1;\r
337                         assert(uColIndexA == uColA);\r
338                         assert(uColIndexB == uColB);\r
339                         AppendMatch(msaA, uColIndexA, msaB, uColIndexB, uSeqCountA, uSeqCountB,\r
340                           msaCombined, uColIndexCombined);\r
341                         break;\r
342                         }\r
343                 case 'D':\r
344                         {\r
345                         assert(uPrefixLengthA > 0);\r
346                         const unsigned uColA = uPrefixLengthA - 1;\r
347                         assert(uColIndexA == uColA);\r
348                         AppendDelete(msaA, uColIndexA, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);\r
349                         break;\r
350                         }\r
351                 case 'I':\r
352                         {\r
353                         assert(uPrefixLengthB > 0);\r
354                         const unsigned uColB = uPrefixLengthB - 1;\r
355                         assert(uColIndexB == uColB);\r
356                         AppendInsert(msaB, uColIndexB, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);\r
357                         break;\r
358                         }\r
359                 default:\r
360                         assert(false);\r
361                         }\r
362                 }\r
363         unsigned uInsertColCountA = uColCountA - uColIndexA;\r
364         unsigned uInsertColCountB = uColCountB - uColIndexB;\r
365 \r
366 // TODO: This code looks like a hangover from HMM estimation -- can we delete it?\r
367         assert(uInsertColCountA == 0);\r
368         assert(uInsertColCountB == 0);\r
369         AppendTplInserts(msaA, uColIndexA, uInsertColCountA, msaB, uColIndexB,\r
370           uInsertColCountB, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);\r
371 \r
372         assert(msaCombined.GetColCount() == uEdgeCount);\r
373         }\r
374 \r
375 static const ProfPos PPStart =\r
376         {\r
377         false,          //m_bAllGaps;\r
378         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // m_uSortOrder[21];\r
379         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // m_fcCounts[20];\r
380         1.0,    // m_LL;\r
381         0.0,    // m_LG;\r
382         0.0,    // m_GL;\r
383         0.0,    // m_GG;\r
384         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // m_ALScores\r
385         0,              // m_uResidueGroup;\r
386         1.0,    // m_fOcc;\r
387         0.0,    // m_fcStartOcc;\r
388         0.0,    // m_fcEndOcc;\r
389         0.0,    // m_scoreGapOpen;\r
390         0.0,    // m_scoreGapClose;\r
391         };\r
392 \r
393 // MM\r
394 //  Ai\961        Ai              Out\r
395 //  X           X       LL      LL\r
396 //  X           -       LG      LG\r
397 //  -           X       GL      GL\r
398 //  -           -       GG      GG\r
399 //  \r
400 //  Bj\961        Bj\r
401 //  X           X       LL      LL\r
402 //  X           -       LG      LG\r
403 //  -           X       GL      GL\r
404 //  -           -       GG      GG\r
405 static void SetGapsMM(\r
406   const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,\r
407   const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,\r
408   ProfPos *POut, unsigned uColIndexOut)\r
409         {\r
410         const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;\r
411         const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;\r
412         ProfPos &PPO = POut[uColIndexOut];\r
413 \r
414         PPO.m_LL = wA*PPA.m_LL + wB*PPB.m_LL;\r
415         PPO.m_LG = wA*PPA.m_LG + wB*PPB.m_LG;\r
416         PPO.m_GL = wA*PPA.m_GL + wB*PPB.m_GL;\r
417         PPO.m_GG = wA*PPA.m_GG + wB*PPB.m_GG;\r
418         }\r
419 \r
420 // MD\r
421 //  Ai\961        Ai              Out\r
422 //  X           X       LL      LL\r
423 //  X           -       LG      LG\r
424 //  -           X       GL      GL\r
425 //  -           -       GG      GG\r
426 //  \r
427 //  Bj          (-)\r
428 //  X           -       ?L      LG\r
429 //  -           -       ?G      GG\r
430 static void SetGapsMD(\r
431   const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,\r
432   const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,\r
433   ProfPos *POut, unsigned uColIndexOut)\r
434         {\r
435         const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;\r
436         const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;\r
437         ProfPos &PPO = POut[uColIndexOut];\r
438 \r
439         PPO.m_LL = wA*PPA.m_LL;\r
440         PPO.m_LG = wA*PPA.m_LG + wB*(PPB.m_LL + PPB.m_GL);\r
441         PPO.m_GL = wA*PPA.m_GL;\r
442         PPO.m_GG = wA*PPA.m_GG + wB*(PPB.m_LG + PPB.m_GG);\r
443         }\r
444 \r
445 // DD\r
446 //  Ai\961        Ai              Out\r
447 //  X           X       LL      LL\r
448 //  X           -       LG      LG\r
449 //  -           X       GL      GL\r
450 //  -           -       GG      GG\r
451 //  \r
452 //  (-)         (-)\r
453 //  -           -       ??      GG\r
454 static void SetGapsDD(\r
455   const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,\r
456   const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,\r
457   ProfPos *POut, unsigned uColIndexOut)\r
458         {\r
459         const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;\r
460         ProfPos &PPO = POut[uColIndexOut];\r
461 \r
462         PPO.m_LL = wA*PPA.m_LL;\r
463         PPO.m_LG = wA*PPA.m_LG;\r
464         PPO.m_GL = wA*PPA.m_GL;\r
465         PPO.m_GG = wA*PPA.m_GG + wB;\r
466         }\r
467 \r
468 // MI\r
469 //  Ai          (-)             Out\r
470 //  X           -       ?L      LG\r
471 //  -           -       ?G      GG\r
472 \r
473 //  Bj\961        Bj\r
474 //  X           X       LL      LL\r
475 //  X           -       LG      LG\r
476 //  -           X       GL      GL\r
477 //  -           -       GG      GG\r
478 static void SetGapsMI(\r
479   const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,\r
480   const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,\r
481   ProfPos *POut, unsigned uColIndexOut)\r
482         {\r
483         const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;\r
484         const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;\r
485         ProfPos &PPO = POut[uColIndexOut];\r
486 \r
487         PPO.m_LL = wB*PPB.m_LL;\r
488         PPO.m_LG = wB*PPB.m_LG + wA*(PPA.m_LL + PPA.m_GL);\r
489         PPO.m_GL = wB*PPB.m_GL;\r
490         PPO.m_GG = wB*PPB.m_GG + wA*(PPA.m_LG + PPA.m_GG);\r
491         }\r
492 \r
493 // DM\r
494 //  Ai\961        Ai              Out\r
495 //  X           X       LL      LL\r
496 //  X           -       LG      LG\r
497 //  -           X       GL      GL\r
498 //  -           -       GG      GG\r
499 //  \r
500 //  (-)         Bj              \r
501 //  -           X               ?L      GL\r
502 //  -           -               ?G      GG\r
503 static void SetGapsDM(\r
504   const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,\r
505   const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,\r
506   ProfPos *POut, unsigned uColIndexOut)\r
507         {\r
508         const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;\r
509         const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;\r
510         ProfPos &PPO = POut[uColIndexOut];\r
511 \r
512         PPO.m_LL = wA*PPA.m_LL;\r
513         PPO.m_LG = wA*PPA.m_LG;\r
514         PPO.m_GL = wA*PPA.m_GL + wB*(PPB.m_LL + PPB.m_GL);\r
515         PPO.m_GG = wA*PPA.m_GG + wB*(PPB.m_LG + PPB.m_GG);\r
516         }\r
517 \r
518 // IM\r
519 //  (-)         Ai              Out             \r
520 //  -           X       ?L      GL\r
521 //  -           -       ?G      GG\r
522 \r
523 //  Bj\961        Bj\r
524 //  X           X       LL      LL\r
525 //  X           -       LG      LG\r
526 //  -           X       GL      GL\r
527 //  -           -       GG      GG\r
528 static void SetGapsIM(\r
529   const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,\r
530   const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,\r
531   ProfPos *POut, unsigned uColIndexOut)\r
532         {\r
533         const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;\r
534         const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;\r
535         ProfPos &PPO = POut[uColIndexOut];\r
536 \r
537         PPO.m_LL = wB*PPB.m_LL;\r
538         PPO.m_LG = wB*PPB.m_LG;\r
539         PPO.m_GL = wB*PPB.m_GL + wA*(PPA.m_LL + PPA.m_GL);\r
540         PPO.m_GG = wB*PPB.m_GG + wA*(PPA.m_LG + PPA.m_GG);\r
541         }\r
542 \r
543 // ID\r
544 //  (-)         Ai              Out\r
545 //  -           X       ?L      GL\r
546 //  -           -       ?G      GG\r
547 \r
548 //  Bj          (-)\r
549 //  X           -       ?L      LG\r
550 //  -           -       ?G      GG\r
551 static void SetGapsID(\r
552   const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,\r
553   const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,\r
554   ProfPos *POut, unsigned uColIndexOut)\r
555         {\r
556         const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;\r
557         const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;\r
558         ProfPos &PPO = POut[uColIndexOut];\r
559 \r
560         PPO.m_LL = 0;\r
561         PPO.m_LG = wB*PPB.m_GL + wB*PPB.m_LL;\r
562         PPO.m_GL = wA*PPA.m_GL + wA*PPA.m_LL;\r
563         PPO.m_GG = wA*(PPA.m_LG + PPA.m_GG) + wB*(PPB.m_LG + PPB.m_GG);\r
564         }\r
565 \r
566 // DI\r
567 //  Ai          (-)             Out\r
568 //  X           -       ?L      LG\r
569 //  -           -       ?G      GG\r
570 \r
571 //  (-)         Bj\r
572 //  -           X       ?L      GL\r
573 //  -           -       ?G      GG\r
574 static void SetGapsDI(\r
575   const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,\r
576   const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,\r
577   ProfPos *POut, unsigned uColIndexOut)\r
578         {\r
579         const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;\r
580         const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;\r
581         ProfPos &PPO = POut[uColIndexOut];\r
582 \r
583         PPO.m_LL = 0;\r
584         PPO.m_LG = wA*PPA.m_GL + wA*PPA.m_LL;\r
585         PPO.m_GL = wB*PPB.m_GL + wB*PPB.m_LL;\r
586         PPO.m_GG = wA*(PPA.m_LG + PPA.m_GG) + wB*(PPB.m_LG + PPB.m_GG);\r
587         }\r
588 \r
589 // II\r
590 //  (-)         (-)             Out\r
591 //  -           -       ??      GG\r
592 \r
593 //  Bj\961        Bj\r
594 //  X           X       LL      LL\r
595 //  X           -       LG      LG\r
596 //  -           X       GL      GL\r
597 //  -           -       GG      GG\r
598 static void SetGapsII(\r
599   const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,\r
600   const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,\r
601   ProfPos *POut, unsigned uColIndexOut)\r
602         {\r
603         const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;\r
604         ProfPos &PPO = POut[uColIndexOut];\r
605 \r
606         PPO.m_LL = wB*PPB.m_LL;\r
607         PPO.m_LG = wB*PPB.m_LG;\r
608         PPO.m_GL = wB*PPB.m_GL;\r
609         PPO.m_GG = wB*PPB.m_GG + wA;\r
610         }\r
611 \r
612 static void SetFreqs(\r
613   const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,\r
614   const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,\r
615   ProfPos *POut, unsigned uColIndexOut)\r
616         {\r
617         const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;\r
618         const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;\r
619         ProfPos &PPO = POut[uColIndexOut];\r
620 \r
621         if (g_bNormalizeCounts)\r
622                 {\r
623                 const FCOUNT fA = PPA.m_fOcc*wA/(wA + wB);\r
624                 const FCOUNT fB = PPB.m_fOcc*wB/(wA + wB);\r
625                 FCOUNT fTotal = 0;\r
626                 for (unsigned i = 0; i < 20; ++i)\r
627                         {\r
628                         const FCOUNT f = fA*PPA.m_fcCounts[i] + fB*PPB.m_fcCounts[i];\r
629                         PPO.m_fcCounts[i] = f;\r
630                         fTotal += f;\r
631                         }\r
632                 if (fTotal > 0)\r
633                         for (unsigned i = 0; i < 20; ++i)\r
634                                 PPO.m_fcCounts[i] /= fTotal;\r
635                 }\r
636         else\r
637                 {\r
638                 for (unsigned i = 0; i < 20; ++i)\r
639                         PPO.m_fcCounts[i] = wA*PPA.m_fcCounts[i] + wB*PPB.m_fcCounts[i];\r
640                 }\r
641         }\r
642 \r
643 void AlignTwoProfsGivenPath(const PWPath &Path,\r
644   const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,\r
645   const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,\r
646   ProfPos **ptrPOut, unsigned *ptruLengthOut)\r
647         {\r
648 #if     TRACE\r
649         Log("AlignTwoProfsGivenPath wA=%.3g wB=%.3g Path=\n", wA, wB);\r
650         Path.LogMe();\r
651 #endif\r
652         assert(BTEq(wA + wB, 1.0));\r
653 \r
654         unsigned uColIndexA = 0;\r
655         unsigned uColIndexB = 0;\r
656         unsigned uColIndexOut = 0;\r
657         const unsigned uEdgeCount = Path.GetEdgeCount();\r
658         ProfPos *POut = new ProfPos[uEdgeCount];\r
659         char cPrevType = 'M';\r
660         for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)\r
661                 {\r
662                 const PWEdge &Edge = Path.GetEdge(uEdgeIndex);\r
663                 const char cType = Edge.cType;\r
664 \r
665                 const unsigned uPrefixLengthA = Edge.uPrefixLengthA;\r
666                 const unsigned uPrefixLengthB = Edge.uPrefixLengthB;\r
667 \r
668 #if     TRACE\r
669                 Log("\nEdge %u %c%u.%u ColA=%u ColB=%u\n",\r
670                   uEdgeIndex,\r
671                   Edge.cType,\r
672                   Edge.uPrefixLengthA,\r
673                   Edge.uPrefixLengthB,\r
674                   uColIndexA,\r
675                   uColIndexB);\r
676 #endif\r
677 \r
678                 POut[uColIndexOut].m_bAllGaps = false;\r
679                 switch (cType)\r
680                         {\r
681                 case 'M':\r
682                         {\r
683                         assert(uPrefixLengthA > 0);\r
684                         assert(uPrefixLengthB > 0);\r
685                         SetFreqs(\r
686                           PA, uPrefixLengthA, wA,\r
687                           PB, uPrefixLengthB, wB,\r
688                           POut, uColIndexOut);\r
689                         switch (cPrevType)\r
690                                 {\r
691                         case 'M':\r
692                                 SetGapsMM(\r
693                                   PA, uPrefixLengthA, wA,\r
694                                   PB, uPrefixLengthB, wB,\r
695                                   POut, uColIndexOut);\r
696                           break;\r
697                         case 'D':\r
698                                 SetGapsDM(\r
699                                   PA, uPrefixLengthA, wA,\r
700                                   PB, uPrefixLengthB, wB,\r
701                                   POut, uColIndexOut);\r
702                                 break;\r
703                         case 'I':\r
704                                 SetGapsIM(\r
705                                   PA, uPrefixLengthA, wA,\r
706                                   PB, uPrefixLengthB, wB,\r
707                                   POut, uColIndexOut);\r
708                                 break;\r
709                         default:\r
710                                 Quit("Bad cPrevType");\r
711                                 }\r
712                         ++uColIndexA;\r
713                         ++uColIndexB;\r
714                         ++uColIndexOut;\r
715                         break;\r
716                         }\r
717                 case 'D':\r
718                         {\r
719                         assert(uPrefixLengthA > 0);\r
720                         SetFreqs(\r
721                           PA, uPrefixLengthA, wA,\r
722                           PB, uPrefixLengthB, 0,\r
723                           POut, uColIndexOut);\r
724                         switch (cPrevType)\r
725                                 {\r
726                         case 'M':\r
727                                 SetGapsMD(\r
728                                   PA, uPrefixLengthA, wA,\r
729                                   PB, uPrefixLengthB, wB,\r
730                                   POut, uColIndexOut);\r
731                           break;\r
732                         case 'D':\r
733                                 SetGapsDD(\r
734                                   PA, uPrefixLengthA, wA,\r
735                                   PB, uPrefixLengthB, wB,\r
736                                   POut, uColIndexOut);\r
737                                 break;\r
738                         case 'I':\r
739                                 SetGapsID(\r
740                                   PA, uPrefixLengthA, wA,\r
741                                   PB, uPrefixLengthB, wB,\r
742                                   POut, uColIndexOut);\r
743                                 break;\r
744                         default:\r
745                                 Quit("Bad cPrevType");\r
746                                 }\r
747                         ++uColIndexA;\r
748                         ++uColIndexOut;\r
749                         break;\r
750                         }\r
751                 case 'I':\r
752                         {\r
753                         assert(uPrefixLengthB > 0);\r
754                         SetFreqs(\r
755                           PA, uPrefixLengthA, 0,\r
756                           PB, uPrefixLengthB, wB,\r
757                           POut, uColIndexOut);\r
758                         switch (cPrevType)\r
759                                 {\r
760                         case 'M':\r
761                                 SetGapsMI(\r
762                                   PA, uPrefixLengthA, wA,\r
763                                   PB, uPrefixLengthB, wB,\r
764                                   POut, uColIndexOut);\r
765                           break;\r
766                         case 'D':\r
767                                 SetGapsDI(\r
768                                   PA, uPrefixLengthA, wA,\r
769                                   PB, uPrefixLengthB, wB,\r
770                                   POut, uColIndexOut);\r
771                                 break;\r
772                         case 'I':\r
773                                 SetGapsII(\r
774                                   PA, uPrefixLengthA, wA,\r
775                                   PB, uPrefixLengthB, wB,\r
776                                   POut, uColIndexOut);\r
777                                 break;\r
778                         default:\r
779                                 Quit("Bad cPrevType");\r
780                                 }\r
781                         ++uColIndexB;\r
782                         ++uColIndexOut;\r
783                         break;\r
784                         }\r
785                 default:\r
786                         assert(false);\r
787                         }\r
788                 cPrevType = cType;\r
789                 }\r
790         assert(uColIndexOut == uEdgeCount);\r
791 \r
792         ProfScoresFromFreqs(POut, uEdgeCount);\r
793         ValidateProf(POut, uEdgeCount);\r
794 \r
795         *ptrPOut = POut;\r
796         *ptruLengthOut = uEdgeCount;\r
797 \r
798 #if     TRACE\r
799         Log("AlignTwoProfsGivenPath:\n");\r
800         ListProfile(POut, uEdgeCount, 0);\r
801 #endif\r
802         }\r