+++ /dev/null
-#include "muscle.h"\r
-#include "msa.h"\r
-#include "pwpath.h"\r
-#include "profile.h"\r
-\r
-#define TRACE 0\r
-\r
-static void LogPP(const ProfPos &PP)\r
- {\r
- Log("ResidueGroup %u\n", PP.m_uResidueGroup);\r
- Log("AllGaps %d\n", PP.m_bAllGaps);\r
- Log("Occ %.3g\n", PP.m_fOcc);\r
- Log("LL=%.3g LG=%.3g GL=%.3g GG=%.3g\n", PP.m_LL, PP.m_LG, PP.m_GL, PP.m_GG);\r
- Log("Freqs ");\r
- for (unsigned i = 0; i < 20; ++i)\r
- if (PP.m_fcCounts[i] > 0)\r
- Log("%c=%.3g ", LetterToChar(i), PP.m_fcCounts[i]);\r
- Log("\n");\r
- }\r
-\r
-static void AssertProfPosEq(const ProfPos *PA, const ProfPos *PB, unsigned i)\r
- {\r
- const ProfPos &PPA = PA[i];\r
- const ProfPos &PPB = PB[i];\r
-#define eq(x) if (PPA.m_##x != PPB.m_##x) { LogPP(PPA); LogPP(PPB); Quit("AssertProfPosEq." #x); }\r
-#define be(x) if (!BTEq(PPA.m_##x, PPB.m_##x)) { LogPP(PPA); LogPP(PPB); Quit("AssertProfPosEq." #x); }\r
- eq(bAllGaps)\r
- eq(uResidueGroup)\r
-\r
- be(LL)\r
- be(LG)\r
- be(GL)\r
- be(GG)\r
- be(fOcc)\r
- be(scoreGapOpen)\r
- be(scoreGapClose)\r
-\r
- for (unsigned j = 0; j < 20; ++j)\r
- {\r
-#define eqj(x) if (PPA.m_##x != PPB.m_##x) Quit("AssertProfPosEq j=%u " #x, j);\r
-#define bej(x) if (!BTEq(PPA.m_##x, PPB.m_##x)) Quit("AssertProfPosEq j=%u " #x, j);\r
- bej(fcCounts[j]);\r
-// eqj(uSortOrder[j]) // may differ due to ties, don't check?\r
- bej(AAScores[j])\r
-#undef eqj\r
-#undef bej\r
- }\r
-#undef eq\r
-#undef be\r
- }\r
-\r
-void AssertProfsEq(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,\r
- unsigned uLengthB)\r
- {\r
- if (uLengthA != uLengthB)\r
- Quit("AssertProfsEq: lengths differ %u %u", uLengthA, uLengthB);\r
- for (unsigned i = 0; i < uLengthB; ++i)\r
- AssertProfPosEq(PA, PB, i);\r
- }\r
-\r
-#if DEBUG\r
-static void ValidateProf(const ProfPos *Prof, unsigned uLength)\r
- {\r
- for (unsigned i = 0; i < uLength; ++i)\r
- {\r
- const ProfPos &PP = Prof[i];\r
-\r
- FCOUNT s1 = PP.m_LL + PP.m_LG + PP.m_GL + PP.m_GG;\r
- assert(BTEq(s1, 1.0));\r
-\r
- if (i > 0)\r
- {\r
- const ProfPos &PPPrev = Prof[i-1];\r
- FCOUNT s2 = PPPrev.m_LL + PPPrev.m_GL;\r
- FCOUNT s3 = PP.m_LL + PP.m_LG;\r
- assert(BTEq(s2, s3));\r
- }\r
- if (i < uLength - 1)\r
- {\r
- const ProfPos &PPNext = Prof[i+1];\r
- FCOUNT s4 = PP.m_LL + PP.m_GL;\r
- FCOUNT s5 = PPNext.m_LL + PPNext.m_LG;\r
- assert(BTEq(s4, s5));\r
- }\r
- }\r
- }\r
-#else\r
-#define ValidateProf(Prof, Length) /* empty */\r
-#endif\r
-\r
-static void ScoresFromFreqsPos(ProfPos *Prof, unsigned uLength, unsigned uPos)\r
- {\r
- ProfPos &PP = Prof[uPos];\r
- SortCounts(PP.m_fcCounts, PP.m_uSortOrder);\r
- PP.m_uResidueGroup = ResidueGroupFromFCounts(PP.m_fcCounts);\r
-\r
-// "Occupancy"\r
- PP.m_fOcc = PP.m_LL + PP.m_GL;\r
-\r
-// Frequency of gap-opens in this position (i)\r
-// Gap open = letter in i-1 and gap in i\r
-// = iff LG in i\r
- FCOUNT fcOpen = PP.m_LG;\r
-\r
-// Frequency of gap-closes in this position\r
-// Gap close = gap in i and letter in i+1\r
-// = iff GL in i+1\r
- FCOUNT fcClose;\r
- if (uPos + 1 < uLength)\r
- fcClose = Prof[uPos + 1].m_GL;\r
- else\r
- fcClose = PP.m_GG + PP.m_LG;\r
-\r
- PP.m_scoreGapOpen = (SCORE) ((1.0 - fcOpen)*g_scoreGapOpen/2.0);\r
- PP.m_scoreGapClose = (SCORE) ((1.0 - fcClose)*g_scoreGapOpen/2.0);\r
-#if DOUBLE_AFFINE\r
- PP.m_scoreGapOpen2 = (SCORE) ((1.0 - fcOpen)*g_scoreGapOpen2/2.0);\r
- PP.m_scoreGapClose2 = (SCORE) ((1.0 - fcClose)*g_scoreGapOpen2/2.0);\r
-#endif\r
-\r
- for (unsigned i = 0; i < g_AlphaSize; ++i)\r
- {\r
- SCORE scoreSum = 0;\r
- for (unsigned j = 0; j < g_AlphaSize; ++j)\r
- scoreSum += PP.m_fcCounts[j]*(*g_ptrScoreMatrix)[i][j];\r
- PP.m_AAScores[i] = scoreSum;\r
- }\r
- }\r
-\r
-void ProfScoresFromFreqs(ProfPos *Prof, unsigned uLength)\r
- {\r
- for (unsigned i = 0; i < uLength; ++i)\r
- ScoresFromFreqsPos(Prof, uLength, i);\r
- }\r
-\r
-static void AppendDelete(const MSA &msaA, unsigned &uColIndexA,\r
- unsigned uSeqCountA, unsigned uSeqCountB, MSA &msaCombined,\r
- unsigned &uColIndexCombined)\r
- {\r
-#if TRACE\r
- Log("AppendDelete ColIxA=%u ColIxCmb=%u\n",\r
- uColIndexA, uColIndexCombined);\r
-#endif\r
- for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)\r
- {\r
- char c = msaA.GetChar(uSeqIndexA, uColIndexA);\r
- msaCombined.SetChar(uSeqIndexA, uColIndexCombined, c);\r
- }\r
-\r
- for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)\r
- msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined, '-');\r
-\r
- ++uColIndexCombined;\r
- ++uColIndexA;\r
- }\r
-\r
-static void AppendInsert(const MSA &msaB, unsigned &uColIndexB,\r
- unsigned uSeqCountA, unsigned uSeqCountB, MSA &msaCombined,\r
- unsigned &uColIndexCombined)\r
- {\r
-#if TRACE\r
- Log("AppendInsert ColIxB=%u ColIxCmb=%u\n",\r
- uColIndexB, uColIndexCombined);\r
-#endif\r
- for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)\r
- msaCombined.SetChar(uSeqIndexA, uColIndexCombined, '-');\r
-\r
- for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)\r
- {\r
- char c = msaB.GetChar(uSeqIndexB, uColIndexB);\r
- msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined, c);\r
- }\r
-\r
- ++uColIndexCombined;\r
- ++uColIndexB;\r
- }\r
-\r
-static void AppendTplInserts(const MSA &msaA, unsigned &uColIndexA, unsigned uColCountA,\r
- const MSA &msaB, unsigned &uColIndexB, unsigned uColCountB, unsigned uSeqCountA,\r
- unsigned uSeqCountB, MSA &msaCombined, unsigned &uColIndexCombined)\r
- {\r
-#if TRACE\r
- Log("AppendTplInserts ColIxA=%u ColIxB=%u ColIxCmb=%u\n",\r
- uColIndexA, uColIndexB, uColIndexCombined);\r
-#endif\r
- const unsigned uLengthA = msaA.GetColCount();\r
- const unsigned uLengthB = msaB.GetColCount();\r
-\r
- unsigned uNewColCount = uColCountA;\r
- if (uColCountB > uNewColCount)\r
- uNewColCount = uColCountB;\r
-\r
- for (unsigned n = 0; n < uColCountA; ++n)\r
- {\r
- for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)\r
- {\r
- char c = msaA.GetChar(uSeqIndexA, uColIndexA + n);\r
- c = UnalignChar(c);\r
- msaCombined.SetChar(uSeqIndexA, uColIndexCombined + n, c);\r
- }\r
- }\r
- for (unsigned n = uColCountA; n < uNewColCount; ++n)\r
- {\r
- for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)\r
- msaCombined.SetChar(uSeqIndexA, uColIndexCombined + n, '.');\r
- }\r
-\r
- for (unsigned n = 0; n < uColCountB; ++n)\r
- {\r
- for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)\r
- {\r
- char c = msaB.GetChar(uSeqIndexB, uColIndexB + n);\r
- c = UnalignChar(c);\r
- msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined + n, c);\r
- }\r
- }\r
- for (unsigned n = uColCountB; n < uNewColCount; ++n)\r
- {\r
- for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)\r
- msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined + n, '.');\r
- }\r
-\r
- uColIndexCombined += uNewColCount;\r
- uColIndexA += uColCountA;\r
- uColIndexB += uColCountB;\r
- }\r
-\r
-static void AppendMatch(const MSA &msaA, unsigned &uColIndexA, const MSA &msaB,\r
- unsigned &uColIndexB, unsigned uSeqCountA, unsigned uSeqCountB,\r
- MSA &msaCombined, unsigned &uColIndexCombined)\r
- {\r
-#if TRACE\r
- Log("AppendMatch ColIxA=%u ColIxB=%u ColIxCmb=%u\n",\r
- uColIndexA, uColIndexB, uColIndexCombined);\r
-#endif\r
-\r
- for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)\r
- {\r
- char c = msaA.GetChar(uSeqIndexA, uColIndexA);\r
- msaCombined.SetChar(uSeqIndexA, uColIndexCombined, c);\r
- }\r
-\r
- for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)\r
- {\r
- char c = msaB.GetChar(uSeqIndexB, uColIndexB);\r
- msaCombined.SetChar(uSeqCountA + uSeqIndexB, uColIndexCombined, c);\r
- }\r
-\r
- ++uColIndexA;\r
- ++uColIndexB;\r
- ++uColIndexCombined;\r
- }\r
-\r
-void AlignTwoMSAsGivenPath(const PWPath &Path, const MSA &msaA, const MSA &msaB,\r
- MSA &msaCombined)\r
- {\r
- msaCombined.Clear();\r
-\r
-#if TRACE\r
- Log("FastAlignProfiles\n");\r
- Log("Template A:\n");\r
- msaA.LogMe();\r
- Log("Template B:\n");\r
- msaB.LogMe();\r
-#endif\r
-\r
- const unsigned uColCountA = msaA.GetColCount();\r
- const unsigned uColCountB = msaB.GetColCount();\r
-\r
- const unsigned uSeqCountA = msaA.GetSeqCount();\r
- const unsigned uSeqCountB = msaB.GetSeqCount();\r
-\r
- msaCombined.SetSeqCount(uSeqCountA + uSeqCountB);\r
-\r
-// Copy sequence names into combined MSA\r
- for (unsigned uSeqIndexA = 0; uSeqIndexA < uSeqCountA; ++uSeqIndexA)\r
- {\r
- msaCombined.SetSeqName(uSeqIndexA, msaA.GetSeqName(uSeqIndexA));\r
- msaCombined.SetSeqId(uSeqIndexA, msaA.GetSeqId(uSeqIndexA));\r
- }\r
-\r
- for (unsigned uSeqIndexB = 0; uSeqIndexB < uSeqCountB; ++uSeqIndexB)\r
- {\r
- msaCombined.SetSeqName(uSeqCountA + uSeqIndexB, msaB.GetSeqName(uSeqIndexB));\r
- msaCombined.SetSeqId(uSeqCountA + uSeqIndexB, msaB.GetSeqId(uSeqIndexB));\r
- }\r
-\r
- unsigned uColIndexA = 0;\r
- unsigned uColIndexB = 0;\r
- unsigned uColIndexCombined = 0;\r
- const unsigned uEdgeCount = Path.GetEdgeCount();\r
- for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)\r
- {\r
- const PWEdge &Edge = Path.GetEdge(uEdgeIndex);\r
-#if TRACE\r
- Log("\nEdge %u %c%u.%u\n",\r
- uEdgeIndex,\r
- Edge.cType,\r
- Edge.uPrefixLengthA,\r
- Edge.uPrefixLengthB);\r
-#endif\r
- const char cType = Edge.cType;\r
- const unsigned uPrefixLengthA = Edge.uPrefixLengthA;\r
- unsigned uColCountA = 0;\r
- if (uPrefixLengthA > 0)\r
- {\r
- const unsigned uNodeIndexA = uPrefixLengthA - 1;\r
- const unsigned uTplColIndexA = uNodeIndexA;\r
- if (uTplColIndexA > uColIndexA)\r
- uColCountA = uTplColIndexA - uColIndexA;\r
- }\r
-\r
- const unsigned uPrefixLengthB = Edge.uPrefixLengthB;\r
- unsigned uColCountB = 0;\r
- if (uPrefixLengthB > 0)\r
- {\r
- const unsigned uNodeIndexB = uPrefixLengthB - 1;\r
- const unsigned uTplColIndexB = uNodeIndexB;\r
- if (uTplColIndexB > uColIndexB)\r
- uColCountB = uTplColIndexB - uColIndexB;\r
- }\r
-\r
-// TODO: This code looks like a hangover from HMM estimation -- can we delete it?\r
- assert(uColCountA == 0);\r
- assert(uColCountB == 0);\r
- AppendTplInserts(msaA, uColIndexA, uColCountA, msaB, uColIndexB, uColCountB,\r
- uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);\r
-\r
- switch (cType)\r
- {\r
- case 'M':\r
- {\r
- assert(uPrefixLengthA > 0);\r
- assert(uPrefixLengthB > 0);\r
- const unsigned uColA = uPrefixLengthA - 1;\r
- const unsigned uColB = uPrefixLengthB - 1;\r
- assert(uColIndexA == uColA);\r
- assert(uColIndexB == uColB);\r
- AppendMatch(msaA, uColIndexA, msaB, uColIndexB, uSeqCountA, uSeqCountB,\r
- msaCombined, uColIndexCombined);\r
- break;\r
- }\r
- case 'D':\r
- {\r
- assert(uPrefixLengthA > 0);\r
- const unsigned uColA = uPrefixLengthA - 1;\r
- assert(uColIndexA == uColA);\r
- AppendDelete(msaA, uColIndexA, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);\r
- break;\r
- }\r
- case 'I':\r
- {\r
- assert(uPrefixLengthB > 0);\r
- const unsigned uColB = uPrefixLengthB - 1;\r
- assert(uColIndexB == uColB);\r
- AppendInsert(msaB, uColIndexB, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);\r
- break;\r
- }\r
- default:\r
- assert(false);\r
- }\r
- }\r
- unsigned uInsertColCountA = uColCountA - uColIndexA;\r
- unsigned uInsertColCountB = uColCountB - uColIndexB;\r
-\r
-// TODO: This code looks like a hangover from HMM estimation -- can we delete it?\r
- assert(uInsertColCountA == 0);\r
- assert(uInsertColCountB == 0);\r
- AppendTplInserts(msaA, uColIndexA, uInsertColCountA, msaB, uColIndexB,\r
- uInsertColCountB, uSeqCountA, uSeqCountB, msaCombined, uColIndexCombined);\r
-\r
- assert(msaCombined.GetColCount() == uEdgeCount);\r
- }\r
-\r
-static const ProfPos PPStart =\r
- {\r
- false, //m_bAllGaps;\r
- { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // m_uSortOrder[21];\r
- { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // m_fcCounts[20];\r
- 1.0, // m_LL;\r
- 0.0, // m_LG;\r
- 0.0, // m_GL;\r
- 0.0, // m_GG;\r
- { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, // m_ALScores\r
- 0, // m_uResidueGroup;\r
- 1.0, // m_fOcc;\r
- 0.0, // m_fcStartOcc;\r
- 0.0, // m_fcEndOcc;\r
- 0.0, // m_scoreGapOpen;\r
- 0.0, // m_scoreGapClose;\r
- };\r
-\r
-// MM\r
-// Ai\961 Ai Out\r
-// X X LL LL\r
-// X - LG LG\r
-// - X GL GL\r
-// - - GG GG\r
-// \r
-// Bj\961 Bj\r
-// X X LL LL\r
-// X - LG LG\r
-// - X GL GL\r
-// - - GG GG\r
-static void SetGapsMM(\r
- const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,\r
- const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,\r
- ProfPos *POut, unsigned uColIndexOut)\r
- {\r
- const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;\r
- const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;\r
- ProfPos &PPO = POut[uColIndexOut];\r
-\r
- PPO.m_LL = wA*PPA.m_LL + wB*PPB.m_LL;\r
- PPO.m_LG = wA*PPA.m_LG + wB*PPB.m_LG;\r
- PPO.m_GL = wA*PPA.m_GL + wB*PPB.m_GL;\r
- PPO.m_GG = wA*PPA.m_GG + wB*PPB.m_GG;\r
- }\r
-\r
-// MD\r
-// Ai\961 Ai Out\r
-// X X LL LL\r
-// X - LG LG\r
-// - X GL GL\r
-// - - GG GG\r
-// \r
-// Bj (-)\r
-// X - ?L LG\r
-// - - ?G GG\r
-static void SetGapsMD(\r
- const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,\r
- const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,\r
- ProfPos *POut, unsigned uColIndexOut)\r
- {\r
- const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;\r
- const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;\r
- ProfPos &PPO = POut[uColIndexOut];\r
-\r
- PPO.m_LL = wA*PPA.m_LL;\r
- PPO.m_LG = wA*PPA.m_LG + wB*(PPB.m_LL + PPB.m_GL);\r
- PPO.m_GL = wA*PPA.m_GL;\r
- PPO.m_GG = wA*PPA.m_GG + wB*(PPB.m_LG + PPB.m_GG);\r
- }\r
-\r
-// DD\r
-// Ai\961 Ai Out\r
-// X X LL LL\r
-// X - LG LG\r
-// - X GL GL\r
-// - - GG GG\r
-// \r
-// (-) (-)\r
-// - - ?? GG\r
-static void SetGapsDD(\r
- const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,\r
- const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,\r
- ProfPos *POut, unsigned uColIndexOut)\r
- {\r
- const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;\r
- ProfPos &PPO = POut[uColIndexOut];\r
-\r
- PPO.m_LL = wA*PPA.m_LL;\r
- PPO.m_LG = wA*PPA.m_LG;\r
- PPO.m_GL = wA*PPA.m_GL;\r
- PPO.m_GG = wA*PPA.m_GG + wB;\r
- }\r
-\r
-// MI\r
-// Ai (-) Out\r
-// X - ?L LG\r
-// - - ?G GG\r
-\r
-// Bj\961 Bj\r
-// X X LL LL\r
-// X - LG LG\r
-// - X GL GL\r
-// - - GG GG\r
-static void SetGapsMI(\r
- const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,\r
- const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,\r
- ProfPos *POut, unsigned uColIndexOut)\r
- {\r
- const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;\r
- const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;\r
- ProfPos &PPO = POut[uColIndexOut];\r
-\r
- PPO.m_LL = wB*PPB.m_LL;\r
- PPO.m_LG = wB*PPB.m_LG + wA*(PPA.m_LL + PPA.m_GL);\r
- PPO.m_GL = wB*PPB.m_GL;\r
- PPO.m_GG = wB*PPB.m_GG + wA*(PPA.m_LG + PPA.m_GG);\r
- }\r
-\r
-// DM\r
-// Ai\961 Ai Out\r
-// X X LL LL\r
-// X - LG LG\r
-// - X GL GL\r
-// - - GG GG\r
-// \r
-// (-) Bj \r
-// - X ?L GL\r
-// - - ?G GG\r
-static void SetGapsDM(\r
- const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,\r
- const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,\r
- ProfPos *POut, unsigned uColIndexOut)\r
- {\r
- const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;\r
- const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;\r
- ProfPos &PPO = POut[uColIndexOut];\r
-\r
- PPO.m_LL = wA*PPA.m_LL;\r
- PPO.m_LG = wA*PPA.m_LG;\r
- PPO.m_GL = wA*PPA.m_GL + wB*(PPB.m_LL + PPB.m_GL);\r
- PPO.m_GG = wA*PPA.m_GG + wB*(PPB.m_LG + PPB.m_GG);\r
- }\r
-\r
-// IM\r
-// (-) Ai Out \r
-// - X ?L GL\r
-// - - ?G GG\r
-\r
-// Bj\961 Bj\r
-// X X LL LL\r
-// X - LG LG\r
-// - X GL GL\r
-// - - GG GG\r
-static void SetGapsIM(\r
- const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,\r
- const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,\r
- ProfPos *POut, unsigned uColIndexOut)\r
- {\r
- const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;\r
- const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;\r
- ProfPos &PPO = POut[uColIndexOut];\r
-\r
- PPO.m_LL = wB*PPB.m_LL;\r
- PPO.m_LG = wB*PPB.m_LG;\r
- PPO.m_GL = wB*PPB.m_GL + wA*(PPA.m_LL + PPA.m_GL);\r
- PPO.m_GG = wB*PPB.m_GG + wA*(PPA.m_LG + PPA.m_GG);\r
- }\r
-\r
-// ID\r
-// (-) Ai Out\r
-// - X ?L GL\r
-// - - ?G GG\r
-\r
-// Bj (-)\r
-// X - ?L LG\r
-// - - ?G GG\r
-static void SetGapsID(\r
- const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,\r
- const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,\r
- ProfPos *POut, unsigned uColIndexOut)\r
- {\r
- const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;\r
- const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;\r
- ProfPos &PPO = POut[uColIndexOut];\r
-\r
- PPO.m_LL = 0;\r
- PPO.m_LG = wB*PPB.m_GL + wB*PPB.m_LL;\r
- PPO.m_GL = wA*PPA.m_GL + wA*PPA.m_LL;\r
- PPO.m_GG = wA*(PPA.m_LG + PPA.m_GG) + wB*(PPB.m_LG + PPB.m_GG);\r
- }\r
-\r
-// DI\r
-// Ai (-) Out\r
-// X - ?L LG\r
-// - - ?G GG\r
-\r
-// (-) Bj\r
-// - X ?L GL\r
-// - - ?G GG\r
-static void SetGapsDI(\r
- const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,\r
- const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,\r
- ProfPos *POut, unsigned uColIndexOut)\r
- {\r
- const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;\r
- const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;\r
- ProfPos &PPO = POut[uColIndexOut];\r
-\r
- PPO.m_LL = 0;\r
- PPO.m_LG = wA*PPA.m_GL + wA*PPA.m_LL;\r
- PPO.m_GL = wB*PPB.m_GL + wB*PPB.m_LL;\r
- PPO.m_GG = wA*(PPA.m_LG + PPA.m_GG) + wB*(PPB.m_LG + PPB.m_GG);\r
- }\r
-\r
-// II\r
-// (-) (-) Out\r
-// - - ?? GG\r
-\r
-// Bj\961 Bj\r
-// X X LL LL\r
-// X - LG LG\r
-// - X GL GL\r
-// - - GG GG\r
-static void SetGapsII(\r
- const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,\r
- const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,\r
- ProfPos *POut, unsigned uColIndexOut)\r
- {\r
- const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;\r
- ProfPos &PPO = POut[uColIndexOut];\r
-\r
- PPO.m_LL = wB*PPB.m_LL;\r
- PPO.m_LG = wB*PPB.m_LG;\r
- PPO.m_GL = wB*PPB.m_GL;\r
- PPO.m_GG = wB*PPB.m_GG + wA;\r
- }\r
-\r
-static void SetFreqs(\r
- const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,\r
- const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,\r
- ProfPos *POut, unsigned uColIndexOut)\r
- {\r
- const ProfPos &PPA = uPrefixLengthA > 0 ? PA[uPrefixLengthA-1] : PPStart;\r
- const ProfPos &PPB = uPrefixLengthB > 0 ? PB[uPrefixLengthB-1] : PPStart;\r
- ProfPos &PPO = POut[uColIndexOut];\r
-\r
- if (g_bNormalizeCounts)\r
- {\r
- const FCOUNT fA = PPA.m_fOcc*wA/(wA + wB);\r
- const FCOUNT fB = PPB.m_fOcc*wB/(wA + wB);\r
- FCOUNT fTotal = 0;\r
- for (unsigned i = 0; i < 20; ++i)\r
- {\r
- const FCOUNT f = fA*PPA.m_fcCounts[i] + fB*PPB.m_fcCounts[i];\r
- PPO.m_fcCounts[i] = f;\r
- fTotal += f;\r
- }\r
- if (fTotal > 0)\r
- for (unsigned i = 0; i < 20; ++i)\r
- PPO.m_fcCounts[i] /= fTotal;\r
- }\r
- else\r
- {\r
- for (unsigned i = 0; i < 20; ++i)\r
- PPO.m_fcCounts[i] = wA*PPA.m_fcCounts[i] + wB*PPB.m_fcCounts[i];\r
- }\r
- }\r
-\r
-void AlignTwoProfsGivenPath(const PWPath &Path,\r
- const ProfPos *PA, unsigned uPrefixLengthA, WEIGHT wA,\r
- const ProfPos *PB, unsigned uPrefixLengthB, WEIGHT wB,\r
- ProfPos **ptrPOut, unsigned *ptruLengthOut)\r
- {\r
-#if TRACE\r
- Log("AlignTwoProfsGivenPath wA=%.3g wB=%.3g Path=\n", wA, wB);\r
- Path.LogMe();\r
-#endif\r
- assert(BTEq(wA + wB, 1.0));\r
-\r
- unsigned uColIndexA = 0;\r
- unsigned uColIndexB = 0;\r
- unsigned uColIndexOut = 0;\r
- const unsigned uEdgeCount = Path.GetEdgeCount();\r
- ProfPos *POut = new ProfPos[uEdgeCount];\r
- char cPrevType = 'M';\r
- for (unsigned uEdgeIndex = 0; uEdgeIndex < uEdgeCount; ++uEdgeIndex)\r
- {\r
- const PWEdge &Edge = Path.GetEdge(uEdgeIndex);\r
- const char cType = Edge.cType;\r
-\r
- const unsigned uPrefixLengthA = Edge.uPrefixLengthA;\r
- const unsigned uPrefixLengthB = Edge.uPrefixLengthB;\r
-\r
-#if TRACE\r
- Log("\nEdge %u %c%u.%u ColA=%u ColB=%u\n",\r
- uEdgeIndex,\r
- Edge.cType,\r
- Edge.uPrefixLengthA,\r
- Edge.uPrefixLengthB,\r
- uColIndexA,\r
- uColIndexB);\r
-#endif\r
-\r
- POut[uColIndexOut].m_bAllGaps = false;\r
- switch (cType)\r
- {\r
- case 'M':\r
- {\r
- assert(uPrefixLengthA > 0);\r
- assert(uPrefixLengthB > 0);\r
- SetFreqs(\r
- PA, uPrefixLengthA, wA,\r
- PB, uPrefixLengthB, wB,\r
- POut, uColIndexOut);\r
- switch (cPrevType)\r
- {\r
- case 'M':\r
- SetGapsMM(\r
- PA, uPrefixLengthA, wA,\r
- PB, uPrefixLengthB, wB,\r
- POut, uColIndexOut);\r
- break;\r
- case 'D':\r
- SetGapsDM(\r
- PA, uPrefixLengthA, wA,\r
- PB, uPrefixLengthB, wB,\r
- POut, uColIndexOut);\r
- break;\r
- case 'I':\r
- SetGapsIM(\r
- PA, uPrefixLengthA, wA,\r
- PB, uPrefixLengthB, wB,\r
- POut, uColIndexOut);\r
- break;\r
- default:\r
- Quit("Bad cPrevType");\r
- }\r
- ++uColIndexA;\r
- ++uColIndexB;\r
- ++uColIndexOut;\r
- break;\r
- }\r
- case 'D':\r
- {\r
- assert(uPrefixLengthA > 0);\r
- SetFreqs(\r
- PA, uPrefixLengthA, wA,\r
- PB, uPrefixLengthB, 0,\r
- POut, uColIndexOut);\r
- switch (cPrevType)\r
- {\r
- case 'M':\r
- SetGapsMD(\r
- PA, uPrefixLengthA, wA,\r
- PB, uPrefixLengthB, wB,\r
- POut, uColIndexOut);\r
- break;\r
- case 'D':\r
- SetGapsDD(\r
- PA, uPrefixLengthA, wA,\r
- PB, uPrefixLengthB, wB,\r
- POut, uColIndexOut);\r
- break;\r
- case 'I':\r
- SetGapsID(\r
- PA, uPrefixLengthA, wA,\r
- PB, uPrefixLengthB, wB,\r
- POut, uColIndexOut);\r
- break;\r
- default:\r
- Quit("Bad cPrevType");\r
- }\r
- ++uColIndexA;\r
- ++uColIndexOut;\r
- break;\r
- }\r
- case 'I':\r
- {\r
- assert(uPrefixLengthB > 0);\r
- SetFreqs(\r
- PA, uPrefixLengthA, 0,\r
- PB, uPrefixLengthB, wB,\r
- POut, uColIndexOut);\r
- switch (cPrevType)\r
- {\r
- case 'M':\r
- SetGapsMI(\r
- PA, uPrefixLengthA, wA,\r
- PB, uPrefixLengthB, wB,\r
- POut, uColIndexOut);\r
- break;\r
- case 'D':\r
- SetGapsDI(\r
- PA, uPrefixLengthA, wA,\r
- PB, uPrefixLengthB, wB,\r
- POut, uColIndexOut);\r
- break;\r
- case 'I':\r
- SetGapsII(\r
- PA, uPrefixLengthA, wA,\r
- PB, uPrefixLengthB, wB,\r
- POut, uColIndexOut);\r
- break;\r
- default:\r
- Quit("Bad cPrevType");\r
- }\r
- ++uColIndexB;\r
- ++uColIndexOut;\r
- break;\r
- }\r
- default:\r
- assert(false);\r
- }\r
- cPrevType = cType;\r
- }\r
- assert(uColIndexOut == uEdgeCount);\r
-\r
- ProfScoresFromFreqs(POut, uEdgeCount);\r
- ValidateProf(POut, uEdgeCount);\r
-\r
- *ptrPOut = POut;\r
- *ptruLengthOut = uEdgeCount;\r
-\r
-#if TRACE\r
- Log("AlignTwoProfsGivenPath:\n");\r
- ListProfile(POut, uEdgeCount, 0);\r
-#endif\r
- }\r