--- /dev/null
+#include "muscle.h"\r
+#include "msa.h"\r
+#include "pwpath.h"\r
+#include "profile.h"\r
+\r
+#define TRACE 0\r
+\r
+static void LogPP(const ProfPos &PP)\r
+ {\r
+ Log("ResidueGroup %u\n", PP.m_uResidueGroup);\r
+ Log("AllGaps %d\n", PP.m_bAllGaps);\r
+ Log("Occ %.3g\n", PP.m_fOcc);\r
+ Log("LL=%.3g LG=%.3g GL=%.3g GG=%.3g\n", PP.m_LL, PP.m_LG, PP.m_GL, PP.m_GG);\r
+ Log("Freqs ");\r
+ for (unsigned i = 0; i < 20; ++i)\r
+ if (PP.m_fcCounts[i] > 0)\r
+ Log("%c=%.3g ", LetterToChar(i), PP.m_fcCounts[i]);\r
+ Log("\n");\r
+ }\r
+\r
+static void AssertProfPosEq(const ProfPos *PA, const ProfPos *PB, unsigned i)\r
+ {\r
+ const ProfPos &PPA = PA[i];\r
+ const ProfPos &PPB = PB[i];\r
+#define eq(x) if (PPA.m_##x != PPB.m_##x) { LogPP(PPA); LogPP(PPB); Quit("AssertProfPosEq." #x); }\r
+#define be(x) if (!BTEq(PPA.m_##x, PPB.m_##x)) { LogPP(PPA); LogPP(PPB); Quit("AssertProfPosEq." #x); }\r
+ eq(bAllGaps)\r
+ eq(uResidueGroup)\r
+\r
+ be(LL)\r
+ be(LG)\r
+ be(GL)\r
+ be(GG)\r
+ be(fOcc)\r
+ be(scoreGapOpen)\r
+ be(scoreGapClose)\r
+\r
+ for (unsigned j = 0; j < 20; ++j)\r
+ {\r
+#define eqj(x) if (PPA.m_##x != PPB.m_##x) Quit("AssertProfPosEq j=%u " #x, j);\r
+#define bej(x) if (!BTEq(PPA.m_##x, PPB.m_##x)) Quit("AssertProfPosEq j=%u " #x, j);\r
+ bej(fcCounts[j]);\r
+// eqj(uSortOrder[j]) // may differ due to ties, don't check?\r
+ bej(AAScores[j])\r
+#undef eqj\r
+#undef bej\r
+ }\r
+#undef eq\r
+#undef be\r
+ }\r
+\r
+void AssertProfsEq(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,\r
+ unsigned uLengthB)\r
+ {\r
+ if (uLengthA != uLengthB)\r
+ Quit("AssertProfsEq: lengths differ %u %u", uLengthA, uLengthB);\r
+ for (unsigned i = 0; i < uLengthB; ++i)\r
+ AssertProfPosEq(PA, PB, i);\r
+ }\r
+\r
+#if DEBUG\r
+static void ValidateProf(const ProfPos *Prof, unsigned uLength)\r
+ {\r
+ for (unsigned i = 0; i < uLength; ++i)\r
+ {\r
+ const ProfPos &PP = Prof[i];\r
+\r
+ FCOUNT s1 = PP.m_LL + PP.m_LG + PP.m_GL + PP.m_GG;\r
+ assert(BTEq(s1, 1.0));\r
+\r
+ if (i > 0)\r
+ {\r
+ const ProfPos &PPPrev = Prof[i-1];\r
+ FCOUNT s2 = PPPrev.m_LL + PPPrev.m_GL;\r
+ FCOUNT s3 = PP.m_LL + PP.m_LG;\r
+ assert(BTEq(s2, s3));\r
+ }\r
+ if (i < uLength - 1)\r
+ {\r
+ const ProfPos &PPNext = Prof[i+1];\r
+ FCOUNT s4 = PP.m_LL + PP.m_GL;\r
+ FCOUNT s5 = PPNext.m_LL + PPNext.m_LG;\r
+ assert(BTEq(s4, s5));\r
+ }\r
+ }\r
+ }\r
+#else\r
+#define ValidateProf(Prof, Length) /* empty */\r
+#endif\r
+\r
+static void ScoresFromFreqsPos(ProfPos *Prof, unsigned uLength, unsigned uPos)\r
+ {\r
+ ProfPos &PP = Prof[uPos];\r
+ SortCounts(PP.m_fcCounts, PP.m_uSortOrder);\r
+ PP.m_uResidueGroup = ResidueGroupFromFCounts(PP.m_fcCounts);\r
+\r
+// "Occupancy"\r
+ PP.m_fOcc = PP.m_LL + PP.m_GL;\r
+\r
+// Frequency of gap-opens in this position (i)\r
+// Gap open = letter in i-1 and gap in i\r
+// = iff LG in i\r
+ FCOUNT fcOpen = PP.m_LG;\r
+\r
+// Frequency of gap-closes in this position\r
+// Gap close = gap in i and letter in i+1\r
+// = iff GL in i+1\r
+ FCOUNT fcClose;\r
+ if (uPos + 1 < uLength)\r
+ fcClose = Prof[uPos + 1].m_GL;\r
+ else\r
+ fcClose = PP.m_GG + PP.m_LG;\r
+\r
+ PP.m_scoreGapOpen = (SCORE) ((1.0 - fcOpen)*g_scoreGapOpen/2.0);\r
+ PP.m_scoreGapClose = (SCORE) ((1.0 - fcClose)*g_scoreGapOpen/2.0);\r
+#if DOUBLE_AFFINE\r
+ PP.m_scoreGapOpen2 = (SCORE) ((1.0 - fcOpen)*g_scoreGapOpen2/2.0);\r
+ PP.m_scoreGapClose2 = (SCORE) ((1.0 - fcClose)*g_scoreGapOpen2/2.0);\r
+#endif\r
+\r
+ for (unsigned i = 0; i < g_AlphaSize; ++i)\r
+ {\r
+ SCORE scoreSum = 0;\r
+ for (unsigned j = 0; j < g_AlphaSize; ++j)\r
+ scoreSum += PP.m_fcCounts[j]*(*g_ptrScoreMatrix)[i][j];\r
+ PP.m_AAScores[i] = scoreSum;\r
+ }\r
+ }\r
+\r
+void ProfScoresFromFreqs(ProfPos *Prof, unsigned uLength)\r
+ {\r
+ for (unsigned i = 0; i < uLength; ++i)\r
+ ScoresFromFreqsPos(Prof, uLength, i);\r
+ }\r
+\r
+static void AppendDelete(const MSA &msaA, unsigned &uColIndexA,\r
+ unsigned uSeqCountA, unsigned uSeqCountB, MSA &msaCombined,\r
+ unsigned &uColIndexCombined)\r
+ {\r
+#if TRACE\r
+ Log("AppendDelete ColIxA=%u ColIxCmb=%u\n",\r
+ uColIndexA, uColIndexCombined);\r
+#endif\r
+ for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)\r
+ {\r
+ char c = msaA.GetChar(uSeqIndexA, uColIndexA);\r
+ msaCombined.SetChar(uSeqIndexA, uColIndexCombined, c);\r
+ }\r
+\r
+ for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)\r
+ msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined, '-');\r
+\r
+ ++uColIndexCombined;\r
+ ++uColIndexA;\r
+ }\r
+\r
+static void AppendInsert(const MSA &msaB, unsigned &uColIndexB,\r
+ unsigned uSeqCountA, unsigned uSeqCountB, MSA &msaCombined,\r
+ unsigned &uColIndexCombined)\r
+ {\r
+#if TRACE\r
+ Log("AppendInsert ColIxB=%u ColIxCmb=%u\n",\r
+ uColIndexB, uColIndexCombined);\r
+#endif\r
+ for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)\r
+ msaCombined.SetChar(uSeqIndexA, uColIndexCombined, '-');\r
+\r
+ for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)\r
+ {\r
+ char c = msaB.GetChar(uSeqIndexB, uColIndexB);\r
+ msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined, c);\r
+ }\r
+\r
+ ++uColIndexCombined;\r
+ ++uColIndexB;\r
+ }\r
+\r
+static void AppendTplInserts(const MSA &msaA, unsigned &uColIndexA, unsigned uColCountA,\r
+ const MSA &msaB, unsigned &uColIndexB, unsigned uColCountB, unsigned uSeqCountA,\r
+ unsigned uSeqCountB, MSA &msaCombined, unsigned &uColIndexCombined)\r
+ {\r
+#if TRACE\r
+ Log("AppendTplInserts ColIxA=%u ColIxB=%u ColIxCmb=%u\n",\r
+ uColIndexA, uColIndexB, uColIndexCombined);\r
+#endif\r
+ const unsigned uLengthA = msaA.GetColCount();\r
+ const unsigned uLengthB = msaB.GetColCount();\r
+\r
+ unsigned uNewColCount = uColCountA;\r
+ if (uColCountB > uNewColCount)\r
+ uNewColCount = uColCountB;\r
+\r
+ for (unsigned n = 0; n < uColCountA; ++n)\r
+ {\r
+ for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)\r
+ {\r
+ char c = msaA.GetChar(uSeqIndexA, uColIndexA + n);\r
+ c = UnalignChar(c);\r
+ msaCombined.SetChar(uSeqIndexA, uColIndexCombined + n, c);\r
+ }\r
+ }\r
+ for (unsigned n = uColCountA; n < uNewColCount; ++n)\r
+ {\r
+ for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)\r
+ msaCombined.SetChar(uSeqIndexA, uColIndexCombined + n, '.');\r
+ }\r
+\r
+ for (unsigned n = 0; n < uColCountB; ++n)\r
+ {\r
+ for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)\r
+ {\r
+ char c = msaB.GetChar(uSeqIndexB, uColIndexB + n);\r
+ c = UnalignChar(c);\r
+ msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined + n, c);\r
+ }\r
+ }\r
+ for (unsigned n = uColCountB; n < uNewColCount; ++n)\r
+ {\r
+ for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)\r
+ msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined + n, '.');\r
+ }\r
+\r
+ uColIndexCombined += uNewColCount;\r
+ uColIndexA += uColCountA;\r
+ uColIndexB += uColCountB;\r
+ }\r
+\r
+static void AppendMatch(const MSA &msaA, unsigned &uColIndexA, const MSA &msaB,\r
+ unsigned &uColIndexB, unsigned uSeqCountA, unsigned uSeqCountB,\r
+ MSA &msaCombined, unsigned &uColIndexCombined)\r
+ {\r
+#if TRACE\r
+ Log("AppendMatch ColIxA=%u ColIxB=%u ColIxCmb=%u\n",\r
+ uColIndexA, uColIndexB, uColIndexCombined);\r
+#endif\r
+\r
+ for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)\r
+ {\r
+ char c = msaA.GetChar(uSeqIndexA, uColIndexA);\r
+ msaCombined.SetChar(uSeqIndexA, uColIndexCombined, c);\r
+ }\r
+\r
+ for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)\r
+ {\r
+ char c = msaB.GetChar(uSeqIndexB, uColIndexB);\r
+ msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined, c);\r
+ }\r
+\r
+ ++uColIndexA;\r
+ ++uColIndexB;\r
+ ++uColIndexCombined;\r
+ }\r
+\r
+void AlignTwoMSAsGivenPath(const PWPath &Path, const MSA &msaA, const MSA &msaB,\r
+ MSA &msaCombined)\r
+ {\r
+ msaCombined.Clear();\r
+\r
+#if TRACE\r
+ Log("FastAlignProfiles\n");\r
+ Log("Template A:\n");\r
+ msaA.LogMe();\r
+ Log("Template B:\n");\r
+ msaB.LogMe();\r
+#endif\r
+\r
+ const unsigned uColCountA = msaA.GetColCount();\r
+ const unsigned uColCountB = msaB.GetColCount();\r
+\r
+ const unsigned uSeqCountA = msaA.GetSeqCount();\r
+ const unsigned uSeqCountB = msaB.GetSeqCount();\r
+\r
+ msaCombined.SetSeqCount(uSeqCountA + uSeqCountB);\r
+\r
+// Copy sequence names into combined MSA\r
+ for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)\r
+ {\r
+ msaCombined.SetSeqName(uSeqIndexA, msaA.GetSeqName(uSeqIndexA));\r
+ msaCombined.SetSeqId(uSeqIndexA, msaA.GetSeqId(uSeqIndexA));\r
+ }\r
+\r
+ for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)\r
+ {\r
+ msaCombined.SetSeqName(uSeqCountA + uSeqIndexB, msaB.GetSeqName(uSeqIndexB));\r
+ msaCombined.SetSeqId(uSeqCountA + uSeqIndexB, msaB.GetSeqId(uSeqIndexB));\r
+ }\r
+\r
+ unsigned uColIndexA = 0;\r
+ unsigned uColIndexB = 0;\r
+ unsigned uColIndexCombined = 0;\r
+ const unsigned uEdgeCount = Path.GetEdgeCount();\r
+ for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)\r
+ {\r
+ const PWEdge &Edge = Path.GetEdge(uEdgeIndex);\r
+#if TRACE\r
+ Log("\nEdge %u %c%u.%u\n",\r
+ uEdgeIndex,\r
+ Edge.cType,\r
+ Edge.uPrefixLengthA,\r
+ Edge.uPrefixLengthB);\r
+#endif\r
+ const char cType = Edge.cType;\r
+ const unsigned uPrefixLengthA = Edge.uPrefixLengthA;\r
+ unsigned uColCountA = 0;\r
+ if (uPrefixLengthA > 0)\r
+ {\r
+ const unsigned uNodeIndexA = uPrefixLengthA - 1;\r
+ const unsigned uTplColIndexA = uNodeIndexA;\r
+ if (uTplColIndexA > uColIndexA)\r
+ uColCountA = uTplColIndexA - uColIndexA;\r
+ }\r
+\r
+ const unsigned uPrefixLengthB = Edge.uPrefixLengthB;\r
+ unsigned uColCountB = 0;\r
+ if (uPrefixLengthB > 0)\r
+ {\r
+ const unsigned uNodeIndexB = uPrefixLengthB - 1;\r
+ const unsigned uTplColIndexB = uNodeIndexB;\r
+ if (uTplColIndexB > uColIndexB)\r
+ uColCountB = uTplColIndexB - uColIndexB;\r
+ }\r
+\r
+// TODO: This code looks like a hangover from HMM estimation -- can we delete it?\r
+ assert(uColCountA == 0);\r
+ assert(uColCountB == 0);\r
+ AppendTplInserts(msaA, uColIndexA, uColCountA, msaB, uColIndexB, uColCountB,\r
+ uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);\r
+\r
+ switch (cType)\r
+ {\r
+ case 'M':\r
+ {\r
+ assert(uPrefixLengthA > 0);\r
+ assert(uPrefixLengthB > 0);\r
+ const unsigned uColA = uPrefixLengthA - 1;\r
+ const unsigned uColB = uPrefixLengthB - 1;\r
+ assert(uColIndexA == uColA);\r
+ assert(uColIndexB == uColB);\r
+ AppendMatch(msaA, uColIndexA, msaB, uColIndexB, uSeqCountA, uSeqCountB,\r
+ msaCombined, uColIndexCombined);\r
+ break;\r
+ }\r
+ case 'D':\r
+ {\r
+ assert(uPrefixLengthA > 0);\r
+ const unsigned uColA = uPrefixLengthA - 1;\r
+ assert(uColIndexA == uColA);\r
+ AppendDelete(msaA, uColIndexA, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);\r
+ break;\r
+ }\r
+ case 'I':\r
+ {\r
+ assert(uPrefixLengthB > 0);\r
+ const unsigned uColB = uPrefixLengthB - 1;\r
+ assert(uColIndexB == uColB);\r
+ AppendInsert(msaB, uColIndexB, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);\r
+ break;\r
+ }\r
+ default:\r
+ assert(false);\r
+ }\r
+ }\r
+ unsigned uInsertColCountA = uColCountA - uColIndexA;\r
+ unsigned uInsertColCountB = uColCountB - uColIndexB;\r
+\r
+// TODO: This code looks like a hangover from HMM estimation -- can we delete it?\r
+ assert(uInsertColCountA == 0);\r
+ assert(uInsertColCountB == 0);\r
+ AppendTplInserts(msaA, uColIndexA, uInsertColCountA, msaB, uColIndexB,\r
+ uInsertColCountB, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);\r
+\r
+ assert(msaCombined.GetColCount() == uEdgeCount);\r
+ }\r
+\r
+static const ProfPos PPStart =\r
+ {\r
+ false, //m_bAllGaps;\r
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // m_uSortOrder[21];\r
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // m_fcCounts[20];\r
+ 1.0, // m_LL;\r
+ 0.0, // m_LG;\r
+ 0.0, // m_GL;\r
+ 0.0, // m_GG;\r
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // m_ALScores\r
+ 0, // m_uResidueGroup;\r
+ 1.0, // m_fOcc;\r
+ 0.0, // m_fcStartOcc;\r
+ 0.0, // m_fcEndOcc;\r
+ 0.0, // m_scoreGapOpen;\r
+ 0.0, // m_scoreGapClose;\r
+ };\r
+\r
+// MM\r
+// Ai\961 Ai Out\r
+// X X LL LL\r
+// X - LG LG\r
+// - X GL GL\r
+// - - GG GG\r
+// \r
+// Bj\961 Bj\r
+// X X LL LL\r
+// X - LG LG\r
+// - X GL GL\r
+// - - GG GG\r
+static void SetGapsMM(\r
+ const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,\r
+ const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,\r
+ ProfPos *POut, unsigned uColIndexOut)\r
+ {\r
+ const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;\r
+ const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;\r
+ ProfPos &PPO = POut[uColIndexOut];\r
+\r
+ PPO.m_LL = wA*PPA.m_LL + wB*PPB.m_LL;\r
+ PPO.m_LG = wA*PPA.m_LG + wB*PPB.m_LG;\r
+ PPO.m_GL = wA*PPA.m_GL + wB*PPB.m_GL;\r
+ PPO.m_GG = wA*PPA.m_GG + wB*PPB.m_GG;\r
+ }\r
+\r
+// MD\r
+// Ai\961 Ai Out\r
+// X X LL LL\r
+// X - LG LG\r
+// - X GL GL\r
+// - - GG GG\r
+// \r
+// Bj (-)\r
+// X - ?L LG\r
+// - - ?G GG\r
+static void SetGapsMD(\r
+ const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,\r
+ const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,\r
+ ProfPos *POut, unsigned uColIndexOut)\r
+ {\r
+ const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;\r
+ const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;\r
+ ProfPos &PPO = POut[uColIndexOut];\r
+\r
+ PPO.m_LL = wA*PPA.m_LL;\r
+ PPO.m_LG = wA*PPA.m_LG + wB*(PPB.m_LL + PPB.m_GL);\r
+ PPO.m_GL = wA*PPA.m_GL;\r
+ PPO.m_GG = wA*PPA.m_GG + wB*(PPB.m_LG + PPB.m_GG);\r
+ }\r
+\r
+// DD\r
+// Ai\961 Ai Out\r
+// X X LL LL\r
+// X - LG LG\r
+// - X GL GL\r
+// - - GG GG\r
+// \r
+// (-) (-)\r
+// - - ?? GG\r
+static void SetGapsDD(\r
+ const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,\r
+ const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,\r
+ ProfPos *POut, unsigned uColIndexOut)\r
+ {\r
+ const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;\r
+ ProfPos &PPO = POut[uColIndexOut];\r
+\r
+ PPO.m_LL = wA*PPA.m_LL;\r
+ PPO.m_LG = wA*PPA.m_LG;\r
+ PPO.m_GL = wA*PPA.m_GL;\r
+ PPO.m_GG = wA*PPA.m_GG + wB;\r
+ }\r
+\r
+// MI\r
+// Ai (-) Out\r
+// X - ?L LG\r
+// - - ?G GG\r
+\r
+// Bj\961 Bj\r
+// X X LL LL\r
+// X - LG LG\r
+// - X GL GL\r
+// - - GG GG\r
+static void SetGapsMI(\r
+ const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,\r
+ const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,\r
+ ProfPos *POut, unsigned uColIndexOut)\r
+ {\r
+ const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;\r
+ const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;\r
+ ProfPos &PPO = POut[uColIndexOut];\r
+\r
+ PPO.m_LL = wB*PPB.m_LL;\r
+ PPO.m_LG = wB*PPB.m_LG + wA*(PPA.m_LL + PPA.m_GL);\r
+ PPO.m_GL = wB*PPB.m_GL;\r
+ PPO.m_GG = wB*PPB.m_GG + wA*(PPA.m_LG + PPA.m_GG);\r
+ }\r
+\r
+// DM\r
+// Ai\961 Ai Out\r
+// X X LL LL\r
+// X - LG LG\r
+// - X GL GL\r
+// - - GG GG\r
+// \r
+// (-) Bj \r
+// - X ?L GL\r
+// - - ?G GG\r
+static void SetGapsDM(\r
+ const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,\r
+ const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,\r
+ ProfPos *POut, unsigned uColIndexOut)\r
+ {\r
+ const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;\r
+ const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;\r
+ ProfPos &PPO = POut[uColIndexOut];\r
+\r
+ PPO.m_LL = wA*PPA.m_LL;\r
+ PPO.m_LG = wA*PPA.m_LG;\r
+ PPO.m_GL = wA*PPA.m_GL + wB*(PPB.m_LL + PPB.m_GL);\r
+ PPO.m_GG = wA*PPA.m_GG + wB*(PPB.m_LG + PPB.m_GG);\r
+ }\r
+\r
+// IM\r
+// (-) Ai Out \r
+// - X ?L GL\r
+// - - ?G GG\r
+\r
+// Bj\961 Bj\r
+// X X LL LL\r
+// X - LG LG\r
+// - X GL GL\r
+// - - GG GG\r
+static void SetGapsIM(\r
+ const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,\r
+ const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,\r
+ ProfPos *POut, unsigned uColIndexOut)\r
+ {\r
+ const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;\r
+ const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;\r
+ ProfPos &PPO = POut[uColIndexOut];\r
+\r
+ PPO.m_LL = wB*PPB.m_LL;\r
+ PPO.m_LG = wB*PPB.m_LG;\r
+ PPO.m_GL = wB*PPB.m_GL + wA*(PPA.m_LL + PPA.m_GL);\r
+ PPO.m_GG = wB*PPB.m_GG + wA*(PPA.m_LG + PPA.m_GG);\r
+ }\r
+\r
+// ID\r
+// (-) Ai Out\r
+// - X ?L GL\r
+// - - ?G GG\r
+\r
+// Bj (-)\r
+// X - ?L LG\r
+// - - ?G GG\r
+static void SetGapsID(\r
+ const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,\r
+ const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,\r
+ ProfPos *POut, unsigned uColIndexOut)\r
+ {\r
+ const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;\r
+ const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;\r
+ ProfPos &PPO = POut[uColIndexOut];\r
+\r
+ PPO.m_LL = 0;\r
+ PPO.m_LG = wB*PPB.m_GL + wB*PPB.m_LL;\r
+ PPO.m_GL = wA*PPA.m_GL + wA*PPA.m_LL;\r
+ PPO.m_GG = wA*(PPA.m_LG + PPA.m_GG) + wB*(PPB.m_LG + PPB.m_GG);\r
+ }\r
+\r
+// DI\r
+// Ai (-) Out\r
+// X - ?L LG\r
+// - - ?G GG\r
+\r
+// (-) Bj\r
+// - X ?L GL\r
+// - - ?G GG\r
+static void SetGapsDI(\r
+ const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,\r
+ const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,\r
+ ProfPos *POut, unsigned uColIndexOut)\r
+ {\r
+ const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;\r
+ const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;\r
+ ProfPos &PPO = POut[uColIndexOut];\r
+\r
+ PPO.m_LL = 0;\r
+ PPO.m_LG = wA*PPA.m_GL + wA*PPA.m_LL;\r
+ PPO.m_GL = wB*PPB.m_GL + wB*PPB.m_LL;\r
+ PPO.m_GG = wA*(PPA.m_LG + PPA.m_GG) + wB*(PPB.m_LG + PPB.m_GG);\r
+ }\r
+\r
+// II\r
+// (-) (-) Out\r
+// - - ?? GG\r
+\r
+// Bj\961 Bj\r
+// X X LL LL\r
+// X - LG LG\r
+// - X GL GL\r
+// - - GG GG\r
+static void SetGapsII(\r
+ const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,\r
+ const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,\r
+ ProfPos *POut, unsigned uColIndexOut)\r
+ {\r
+ const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;\r
+ ProfPos &PPO = POut[uColIndexOut];\r
+\r
+ PPO.m_LL = wB*PPB.m_LL;\r
+ PPO.m_LG = wB*PPB.m_LG;\r
+ PPO.m_GL = wB*PPB.m_GL;\r
+ PPO.m_GG = wB*PPB.m_GG + wA;\r
+ }\r
+\r
+static void SetFreqs(\r
+ const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,\r
+ const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,\r
+ ProfPos *POut, unsigned uColIndexOut)\r
+ {\r
+ const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;\r
+ const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;\r
+ ProfPos &PPO = POut[uColIndexOut];\r
+\r
+ if (g_bNormalizeCounts)\r
+ {\r
+ const FCOUNT fA = PPA.m_fOcc*wA/(wA + wB);\r
+ const FCOUNT fB = PPB.m_fOcc*wB/(wA + wB);\r
+ FCOUNT fTotal = 0;\r
+ for (unsigned i = 0; i < 20; ++i)\r
+ {\r
+ const FCOUNT f = fA*PPA.m_fcCounts[i] + fB*PPB.m_fcCounts[i];\r
+ PPO.m_fcCounts[i] = f;\r
+ fTotal += f;\r
+ }\r
+ if (fTotal > 0)\r
+ for (unsigned i = 0; i < 20; ++i)\r
+ PPO.m_fcCounts[i] /= fTotal;\r
+ }\r
+ else\r
+ {\r
+ for (unsigned i = 0; i < 20; ++i)\r
+ PPO.m_fcCounts[i] = wA*PPA.m_fcCounts[i] + wB*PPB.m_fcCounts[i];\r
+ }\r
+ }\r
+\r
+void AlignTwoProfsGivenPath(const PWPath &Path,\r
+ const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,\r
+ const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,\r
+ ProfPos **ptrPOut, unsigned *ptruLengthOut)\r
+ {\r
+#if TRACE\r
+ Log("AlignTwoProfsGivenPath wA=%.3g wB=%.3g Path=\n", wA, wB);\r
+ Path.LogMe();\r
+#endif\r
+ assert(BTEq(wA + wB, 1.0));\r
+\r
+ unsigned uColIndexA = 0;\r
+ unsigned uColIndexB = 0;\r
+ unsigned uColIndexOut = 0;\r
+ const unsigned uEdgeCount = Path.GetEdgeCount();\r
+ ProfPos *POut = new ProfPos[uEdgeCount];\r
+ char cPrevType = 'M';\r
+ for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)\r
+ {\r
+ const PWEdge &Edge = Path.GetEdge(uEdgeIndex);\r
+ const char cType = Edge.cType;\r
+\r
+ const unsigned uPrefixLengthA = Edge.uPrefixLengthA;\r
+ const unsigned uPrefixLengthB = Edge.uPrefixLengthB;\r
+\r
+#if TRACE\r
+ Log("\nEdge %u %c%u.%u ColA=%u ColB=%u\n",\r
+ uEdgeIndex,\r
+ Edge.cType,\r
+ Edge.uPrefixLengthA,\r
+ Edge.uPrefixLengthB,\r
+ uColIndexA,\r
+ uColIndexB);\r
+#endif\r
+\r
+ POut[uColIndexOut].m_bAllGaps = false;\r
+ switch (cType)\r
+ {\r
+ case 'M':\r
+ {\r
+ assert(uPrefixLengthA > 0);\r
+ assert(uPrefixLengthB > 0);\r
+ SetFreqs(\r
+ PA, uPrefixLengthA, wA,\r
+ PB, uPrefixLengthB, wB,\r
+ POut, uColIndexOut);\r
+ switch (cPrevType)\r
+ {\r
+ case 'M':\r
+ SetGapsMM(\r
+ PA, uPrefixLengthA, wA,\r
+ PB, uPrefixLengthB, wB,\r
+ POut, uColIndexOut);\r
+ break;\r
+ case 'D':\r
+ SetGapsDM(\r
+ PA, uPrefixLengthA, wA,\r
+ PB, uPrefixLengthB, wB,\r
+ POut, uColIndexOut);\r
+ break;\r
+ case 'I':\r
+ SetGapsIM(\r
+ PA, uPrefixLengthA, wA,\r
+ PB, uPrefixLengthB, wB,\r
+ POut, uColIndexOut);\r
+ break;\r
+ default:\r
+ Quit("Bad cPrevType");\r
+ }\r
+ ++uColIndexA;\r
+ ++uColIndexB;\r
+ ++uColIndexOut;\r
+ break;\r
+ }\r
+ case 'D':\r
+ {\r
+ assert(uPrefixLengthA > 0);\r
+ SetFreqs(\r
+ PA, uPrefixLengthA, wA,\r
+ PB, uPrefixLengthB, 0,\r
+ POut, uColIndexOut);\r
+ switch (cPrevType)\r
+ {\r
+ case 'M':\r
+ SetGapsMD(\r
+ PA, uPrefixLengthA, wA,\r
+ PB, uPrefixLengthB, wB,\r
+ POut, uColIndexOut);\r
+ break;\r
+ case 'D':\r
+ SetGapsDD(\r
+ PA, uPrefixLengthA, wA,\r
+ PB, uPrefixLengthB, wB,\r
+ POut, uColIndexOut);\r
+ break;\r
+ case 'I':\r
+ SetGapsID(\r
+ PA, uPrefixLengthA, wA,\r
+ PB, uPrefixLengthB, wB,\r
+ POut, uColIndexOut);\r
+ break;\r
+ default:\r
+ Quit("Bad cPrevType");\r
+ }\r
+ ++uColIndexA;\r
+ ++uColIndexOut;\r
+ break;\r
+ }\r
+ case 'I':\r
+ {\r
+ assert(uPrefixLengthB > 0);\r
+ SetFreqs(\r
+ PA, uPrefixLengthA, 0,\r
+ PB, uPrefixLengthB, wB,\r
+ POut, uColIndexOut);\r
+ switch (cPrevType)\r
+ {\r
+ case 'M':\r
+ SetGapsMI(\r
+ PA, uPrefixLengthA, wA,\r
+ PB, uPrefixLengthB, wB,\r
+ POut, uColIndexOut);\r
+ break;\r
+ case 'D':\r
+ SetGapsDI(\r
+ PA, uPrefixLengthA, wA,\r
+ PB, uPrefixLengthB, wB,\r
+ POut, uColIndexOut);\r
+ break;\r
+ case 'I':\r
+ SetGapsII(\r
+ PA, uPrefixLengthA, wA,\r
+ PB, uPrefixLengthB, wB,\r
+ POut, uColIndexOut);\r
+ break;\r
+ default:\r
+ Quit("Bad cPrevType");\r
+ }\r
+ ++uColIndexB;\r
+ ++uColIndexOut;\r
+ break;\r
+ }\r
+ default:\r
+ assert(false);\r
+ }\r
+ cPrevType = cType;\r
+ }\r
+ assert(uColIndexOut == uEdgeCount);\r
+\r
+ ProfScoresFromFreqs(POut, uEdgeCount);\r
+ ValidateProf(POut, uEdgeCount);\r
+\r
+ *ptrPOut = POut;\r
+ *ptruLengthOut = uEdgeCount;\r
+\r
+#if TRACE\r
+ Log("AlignTwoProfsGivenPath:\n");\r
+ ListProfile(POut, uEdgeCount, 0);\r
+#endif\r
+ }\r