--- /dev/null
+#include "muscle.h"\r
+#include <math.h>\r
+#include <stdio.h> // for sprintf\r
+#include "pwpath.h"\r
+#include "profile.h"\r
+#include "gapscoredimer.h"\r
+\r
+#define TRACE 0\r
+\r
+static SCORE TraceBackDimer( const SCORE *DPM_, const SCORE *DPD_, const SCORE *DPI_,\r
+ const char *TBM_, const char *TBD_, const char *TBI_,\r
+ unsigned uLengthA, unsigned uLengthB, PWPath &Path);\r
+\r
+static const char *LocalScoreToStr(SCORE s)\r
+ {\r
+ static char str[16];\r
+ if (MINUS_INFINITY == s)\r
+ return " *";\r
+ sprintf(str, "%6.3g", s);\r
+ return str;\r
+ }\r
+\r
+#if TRACE\r
+static void ListDP(const SCORE *DPM_, const ProfPos *PA, const ProfPos *PB,\r
+ unsigned uPrefixCountA, unsigned uPrefixCountB)\r
+ {\r
+ Log(" ");\r
+ for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)\r
+ {\r
+ char c = ' ';\r
+ if (uPrefixLengthB > 0)\r
+ c = ConsensusChar(PB[uPrefixLengthB - 1]);\r
+ Log(" %4u:%c", uPrefixLengthB, c);\r
+ }\r
+ Log("\n");\r
+ for (unsigned uPrefixLengthA = 0; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)\r
+ {\r
+ char c = ' ';\r
+ if (uPrefixLengthA > 0)\r
+ c = ConsensusChar(PA[uPrefixLengthA - 1]);\r
+ Log("%4u:%c ", uPrefixLengthA, c);\r
+ for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)\r
+ Log(" %s", LocalScoreToStr(DPM(uPrefixLengthA, uPrefixLengthB)));\r
+ Log("\n");\r
+ }\r
+ }\r
+\r
+static void ListTB(const char *TBM_, const ProfPos *PA, const ProfPos *PB,\r
+ unsigned uPrefixCountA, unsigned uPrefixCountB)\r
+ {\r
+ Log(" ");\r
+ for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)\r
+ Log("%2d", uPrefixLengthB);\r
+ Log("\n");\r
+ Log(" ");\r
+ for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)\r
+ {\r
+ char c = ' ';\r
+ if (uPrefixLengthB > 0)\r
+ c = ConsensusChar(PB[uPrefixLengthB - 1]);\r
+ Log(" %c", c);\r
+ }\r
+ Log("\n");\r
+ for (unsigned uPrefixLengthA = 0; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)\r
+ {\r
+ char c = ' ';\r
+ if (uPrefixLengthA > 0)\r
+ c = ConsensusChar(PA[uPrefixLengthA - 1]);\r
+ Log("%4u:%c ", uPrefixLengthA, c);\r
+ for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)\r
+ Log(" %c", TBM(uPrefixLengthA, uPrefixLengthB));\r
+ Log("\n");\r
+ }\r
+ }\r
+#endif // TRACE\r
+\r
+static ProfPos PPTerm;\r
+static bool InitializePPTerm()\r
+ {\r
+ PPTerm.m_bAllGaps = false;\r
+ PPTerm.m_LL = 1;\r
+ PPTerm.m_LG = 0;\r
+ PPTerm.m_GL = 0;\r
+ PPTerm.m_GG = 0;\r
+ PPTerm.m_fOcc = 1;\r
+ return true;\r
+ }\r
+static bool PPTermInitialized = InitializePPTerm();\r
+\r
+static SCORE ScoreProfPosDimerLE(const ProfPos &PPA, const ProfPos &PPB)\r
+ {\r
+ SCORE Score = 0;\r
+ for (unsigned n = 0; n < 20; ++n)\r
+ {\r
+ const unsigned uLetter = PPA.m_uSortOrder[n];\r
+ const FCOUNT fcLetter = PPA.m_fcCounts[uLetter];\r
+ if (0 == fcLetter)\r
+ break;\r
+ Score += fcLetter*PPB.m_AAScores[uLetter];\r
+ }\r
+ if (0 == Score)\r
+ return -2.5;\r
+ SCORE logScore = logf(Score);\r
+ return (SCORE) (logScore*(PPA.m_fOcc * PPB.m_fOcc));\r
+ }\r
+\r
+static SCORE ScoreProfPosDimerPSP(const ProfPos &PPA, const ProfPos &PPB)\r
+ {\r
+ SCORE Score = 0;\r
+ for (unsigned n = 0; n < 20; ++n)\r
+ {\r
+ const unsigned uLetter = PPA.m_uSortOrder[n];\r
+ const FCOUNT fcLetter = PPA.m_fcCounts[uLetter];\r
+ if (0 == fcLetter)\r
+ break;\r
+ Score += fcLetter*PPB.m_AAScores[uLetter];\r
+ }\r
+ return Score;\r
+ }\r
+\r
+static SCORE ScoreProfPosDimer(const ProfPos &PPA, const ProfPos &PPB)\r
+ {\r
+ switch (g_PPScore)\r
+ {\r
+ case PPSCORE_LE:\r
+ return ScoreProfPosDimerLE(PPA, PPB);\r
+\r
+ case PPSCORE_SP:\r
+ case PPSCORE_SV:\r
+ return ScoreProfPosDimerPSP(PPA, PPB);\r
+ }\r
+ Quit("Invalid g_PPScore");\r
+ return 0;\r
+ }\r
+\r
+// Global alignment dynamic programming\r
+// This variant optimizes the profile-profile SP score under the\r
+// dimer approximation.\r
+SCORE GlobalAlignDimer(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,\r
+ unsigned uLengthB, PWPath &Path)\r
+ {\r
+ assert(uLengthB > 0 && uLengthA > 0);\r
+\r
+ const unsigned uPrefixCountA = uLengthA + 1;\r
+ const unsigned uPrefixCountB = uLengthB + 1;\r
+\r
+// Allocate DP matrices\r
+ const size_t LM = uPrefixCountA*uPrefixCountB;\r
+ SCORE *DPM_ = new SCORE[LM];\r
+ SCORE *DPD_ = new SCORE[LM];\r
+ SCORE *DPI_ = new SCORE[LM];\r
+\r
+ char *TBM_ = new char[LM];\r
+ char *TBD_ = new char[LM];\r
+ char *TBI_ = new char[LM];\r
+\r
+ DPM(0, 0) = 0;\r
+ DPD(0, 0) = MINUS_INFINITY;\r
+ DPI(0, 0) = MINUS_INFINITY;\r
+\r
+ TBM(0, 0) = 'S';\r
+ TBD(0, 0) = '?';\r
+ TBI(0, 0) = '?';\r
+\r
+ DPM(1, 0) = MINUS_INFINITY;\r
+ DPD(1, 0) = GapScoreMD(PA[0], PPTerm);\r
+ DPI(1, 0) = MINUS_INFINITY;\r
+\r
+ TBM(1, 0) = '?';\r
+ TBD(1, 0) = 'S';\r
+ TBI(1, 0) = '?';\r
+\r
+ DPM(0, 1) = MINUS_INFINITY;\r
+ DPD(0, 1) = MINUS_INFINITY;\r
+ DPI(0, 1) = GapScoreMI(PPTerm, PB[0]);\r
+\r
+ TBM(0, 1) = '?';\r
+ TBD(0, 1) = '?';\r
+ TBI(0, 1) = 'S';\r
+\r
+// Empty prefix of B is special case\r
+ for (unsigned uPrefixLengthA = 2; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)\r
+ {\r
+ // M=LetterA+LetterB, impossible with empty prefix\r
+ DPM(uPrefixLengthA, 0) = MINUS_INFINITY;\r
+ TBM(uPrefixLengthA, 0) = '?';\r
+\r
+ // D=LetterA+GapB\r
+ DPD(uPrefixLengthA, 0) = DPD(uPrefixLengthA - 1, 0) +\r
+ GapScoreDD(PA[uPrefixLengthA - 1], PPTerm);\r
+ TBD(uPrefixLengthA, 0) = 'D';\r
+\r
+ // I=GapA+LetterB, impossible with empty prefix\r
+ DPI(uPrefixLengthA, 0) = MINUS_INFINITY;\r
+ TBI(uPrefixLengthA, 0) = '?';\r
+ }\r
+\r
+// Empty prefix of A is special case\r
+ for (unsigned uPrefixLengthB = 2; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)\r
+ {\r
+ // M=LetterA+LetterB, impossible with empty prefix\r
+ DPM(0, uPrefixLengthB) = MINUS_INFINITY;\r
+ TBM(0, uPrefixLengthB) = '?';\r
+\r
+ // D=LetterA+GapB, impossible with empty prefix\r
+ DPD(0, uPrefixLengthB) = MINUS_INFINITY;\r
+ TBD(0, uPrefixLengthB) = '?';\r
+\r
+ // I=GapA+LetterB\r
+ DPI(0, uPrefixLengthB) = DPI(0, uPrefixLengthB - 1) +\r
+ GapScoreII(PPTerm, PB[uPrefixLengthB - 1]);\r
+ TBI(0, uPrefixLengthB) = 'I';\r
+ }\r
+\r
+// ============\r
+// Main DP loop\r
+// ============\r
+ for (unsigned uPrefixLengthB = 1; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)\r
+ {\r
+ const ProfPos &PPB = PB[uPrefixLengthB - 1];\r
+ for (unsigned uPrefixLengthA = 1; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)\r
+ {\r
+ const ProfPos &PPA = PA[uPrefixLengthA - 1];\r
+ {\r
+ // Match M=LetterA+LetterB\r
+ SCORE scoreLL = ScoreProfPosDimer(PPA, PPB);\r
+\r
+ SCORE scoreMM = DPM(uPrefixLengthA-1, uPrefixLengthB-1) + GapScoreMM(PPA, PPB);\r
+ SCORE scoreDM = DPD(uPrefixLengthA-1, uPrefixLengthB-1) + GapScoreDM(PPA, PPB);\r
+ SCORE scoreIM = DPI(uPrefixLengthA-1, uPrefixLengthB-1) + GapScoreIM(PPA, PPB);\r
+\r
+ SCORE scoreBest = scoreMM;\r
+ char c = 'M';\r
+ if (scoreDM > scoreBest)\r
+ {\r
+ scoreBest = scoreDM;\r
+ c = 'D';\r
+ }\r
+ if (scoreIM > scoreBest)\r
+ {\r
+ scoreBest = scoreIM;\r
+ c = 'I';\r
+ }\r
+\r
+ DPM(uPrefixLengthA, uPrefixLengthB) = scoreBest + scoreLL;\r
+ TBM(uPrefixLengthA, uPrefixLengthB) = c;\r
+ }\r
+ {\r
+ // Delete D=LetterA+GapB\r
+ SCORE scoreMD = DPM(uPrefixLengthA-1, uPrefixLengthB) + GapScoreMD(PPA, PPB);\r
+ SCORE scoreDD = DPD(uPrefixLengthA-1, uPrefixLengthB) + GapScoreDD(PPA, PPB);\r
+ SCORE scoreID = DPI(uPrefixLengthA-1, uPrefixLengthB) + GapScoreID(PPA, PPB);\r
+\r
+ SCORE scoreBest = scoreMD;\r
+ char c = 'M';\r
+ if (scoreDD > scoreBest)\r
+ {\r
+ scoreBest = scoreDD;\r
+ c = 'D';\r
+ }\r
+ if (scoreID > scoreBest)\r
+ {\r
+ scoreBest = scoreID;\r
+ c = 'I';\r
+ }\r
+\r
+ DPD(uPrefixLengthA, uPrefixLengthB) = scoreBest;\r
+ TBD(uPrefixLengthA, uPrefixLengthB) = c;\r
+ }\r
+ {\r
+ // Insert I=GapA+LetterB\r
+ SCORE scoreMI = DPM(uPrefixLengthA, uPrefixLengthB-1) + GapScoreMI(PPA, PPB);\r
+ SCORE scoreDI = DPD(uPrefixLengthA, uPrefixLengthB-1) + GapScoreDI(PPA, PPB);\r
+ SCORE scoreII = DPI(uPrefixLengthA, uPrefixLengthB-1) + GapScoreII(PPA, PPB);\r
+\r
+ SCORE scoreBest = scoreMI;\r
+ char c = 'M';\r
+ if (scoreDI > scoreBest)\r
+ {\r
+ scoreBest = scoreDI;\r
+ c = 'D';\r
+ }\r
+ if (scoreII > scoreBest)\r
+ {\r
+ scoreBest = scoreII;\r
+ c = 'I';\r
+ }\r
+\r
+ DPI(uPrefixLengthA, uPrefixLengthB) = scoreBest;\r
+ TBI(uPrefixLengthA, uPrefixLengthB) = c;\r
+ }\r
+ }\r
+ }\r
+\r
+#if TRACE\r
+ Log("DPM:\n");\r
+ ListDP(DPM_, PA, PB, uPrefixCountA, uPrefixCountB);\r
+ Log("DPD:\n");\r
+ ListDP(DPD_, PA, PB, uPrefixCountA, uPrefixCountB);\r
+ Log("DPI:\n");\r
+ ListDP(DPI_, PA, PB, uPrefixCountA, uPrefixCountB);\r
+ Log("TBM:\n");\r
+ ListTB(TBM_, PA, PB, uPrefixCountA, uPrefixCountB);\r
+ Log("TBD:\n");\r
+ ListTB(TBD_, PA, PB, uPrefixCountA, uPrefixCountB);\r
+ Log("TBI:\n");\r
+ ListTB(TBI_, PA, PB, uPrefixCountA, uPrefixCountB);\r
+#endif\r
+\r
+ SCORE Score = TraceBackDimer(DPM_, DPD_, DPI_, TBM_, TBD_, TBI_,\r
+ uLengthA, uLengthB, Path);\r
+\r
+#if TRACE\r
+ Log("GlobalAlignDimer score = %.3g\n", Score);\r
+#endif\r
+\r
+ delete[] DPM_;\r
+ delete[] DPD_;\r
+ delete[] DPI_;\r
+\r
+ delete[] TBM_;\r
+ delete[] TBD_;\r
+ delete[] TBI_;\r
+\r
+ return Score;\r
+ }\r
+\r
+static SCORE TraceBackDimer( const SCORE *DPM_, const SCORE *DPD_, const SCORE *DPI_,\r
+ const char *TBM_, const char *TBD_, const char *TBI_,\r
+ unsigned uLengthA, unsigned uLengthB, PWPath &Path)\r
+ {\r
+ const unsigned uPrefixCountA = uLengthA + 1;\r
+\r
+ unsigned uPrefixLengthA = uLengthA;\r
+ unsigned uPrefixLengthB = uLengthB;\r
+\r
+ char cEdge = 'M';\r
+ SCORE scoreMax = DPM(uLengthA, uLengthB);\r
+ if (DPD(uLengthA, uLengthB) > scoreMax)\r
+ {\r
+ scoreMax = DPD(uLengthA, uLengthB);\r
+ cEdge = 'D';\r
+ }\r
+ if (DPI(uLengthA, uLengthB) > scoreMax)\r
+ {\r
+ scoreMax = DPI(uLengthA, uLengthB);\r
+ cEdge = 'I';\r
+ }\r
+\r
+ for (;;)\r
+ {\r
+ if (0 == uPrefixLengthA && 0 == uPrefixLengthB)\r
+ break;\r
+\r
+ PWEdge Edge;\r
+ Edge.cType = cEdge;\r
+ Edge.uPrefixLengthA = uPrefixLengthA;\r
+ Edge.uPrefixLengthB = uPrefixLengthB;\r
+ Path.PrependEdge(Edge);\r
+\r
+#if TRACE\r
+ Log("PLA=%u PLB=%u Edge=%c\n", uPrefixLengthA, uPrefixLengthB, cEdge);\r
+#endif\r
+ switch (cEdge)\r
+ {\r
+ case 'M':\r
+ assert(uPrefixLengthA > 0 && uPrefixLengthB > 0);\r
+ cEdge = TBM(uPrefixLengthA, uPrefixLengthB);\r
+ --uPrefixLengthA;\r
+ --uPrefixLengthB;\r
+ break;\r
+ case 'D':\r
+ assert(uPrefixLengthA > 0);\r
+ cEdge = TBD(uPrefixLengthA, uPrefixLengthB);\r
+ --uPrefixLengthA;\r
+ break;\r
+ case 'I':\r
+ assert(uPrefixLengthB > 0);\r
+ cEdge = TBI(uPrefixLengthA, uPrefixLengthB);\r
+ --uPrefixLengthB;\r
+ break;\r
+ default:\r
+ Quit("Invalid edge PLA=%u PLB=%u %c", uPrefixLengthA, uPrefixLengthB, cEdge);\r
+ }\r
+ }\r
+#if TRACE\r
+ Path.LogMe();\r
+#endif\r
+ return scoreMax;\r
+ }\r