+++ /dev/null
-#include "muscle.h"\r
-#include <math.h>\r
-#include <stdio.h> // for sprintf\r
-#include "pwpath.h"\r
-#include "profile.h"\r
-#include "gapscoredimer.h"\r
-\r
-#define TRACE 0\r
-\r
-static SCORE TraceBackDimer( const SCORE *DPM_, const SCORE *DPD_, const SCORE *DPI_,\r
- const char *TBM_, const char *TBD_, const char *TBI_,\r
- unsigned uLengthA, unsigned uLengthB, PWPath &Path);\r
-\r
-static const char *LocalScoreToStr(SCORE s)\r
- {\r
- static char str[16];\r
- if (MINUS_INFINITY == s)\r
- return " *";\r
- sprintf(str, "%6.3g", s);\r
- return str;\r
- }\r
-\r
-#if TRACE\r
-static void ListDP(const SCORE *DPM_, const ProfPos *PA, const ProfPos *PB,\r
- unsigned uPrefixCountA, unsigned uPrefixCountB)\r
- {\r
- Log(" ");\r
- for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)\r
- {\r
- char c = ' ';\r
- if (uPrefixLengthB > 0)\r
- c = ConsensusChar(PB[uPrefixLengthB - 1]);\r
- Log(" %4u:%c", uPrefixLengthB, c);\r
- }\r
- Log("\n");\r
- for (unsigned uPrefixLengthA = 0; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)\r
- {\r
- char c = ' ';\r
- if (uPrefixLengthA > 0)\r
- c = ConsensusChar(PA[uPrefixLengthA - 1]);\r
- Log("%4u:%c ", uPrefixLengthA, c);\r
- for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)\r
- Log(" %s", LocalScoreToStr(DPM(uPrefixLengthA, uPrefixLengthB)));\r
- Log("\n");\r
- }\r
- }\r
-\r
-static void ListTB(const char *TBM_, const ProfPos *PA, const ProfPos *PB,\r
- unsigned uPrefixCountA, unsigned uPrefixCountB)\r
- {\r
- Log(" ");\r
- for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)\r
- Log("%2d", uPrefixLengthB);\r
- Log("\n");\r
- Log(" ");\r
- for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)\r
- {\r
- char c = ' ';\r
- if (uPrefixLengthB > 0)\r
- c = ConsensusChar(PB[uPrefixLengthB - 1]);\r
- Log(" %c", c);\r
- }\r
- Log("\n");\r
- for (unsigned uPrefixLengthA = 0; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)\r
- {\r
- char c = ' ';\r
- if (uPrefixLengthA > 0)\r
- c = ConsensusChar(PA[uPrefixLengthA - 1]);\r
- Log("%4u:%c ", uPrefixLengthA, c);\r
- for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)\r
- Log(" %c", TBM(uPrefixLengthA, uPrefixLengthB));\r
- Log("\n");\r
- }\r
- }\r
-#endif // TRACE\r
-\r
-static ProfPos PPTerm;\r
-static bool InitializePPTerm()\r
- {\r
- PPTerm.m_bAllGaps = false;\r
- PPTerm.m_LL = 1;\r
- PPTerm.m_LG = 0;\r
- PPTerm.m_GL = 0;\r
- PPTerm.m_GG = 0;\r
- PPTerm.m_fOcc = 1;\r
- return true;\r
- }\r
-static bool PPTermInitialized = InitializePPTerm();\r
-\r
-static SCORE ScoreProfPosDimerLE(const ProfPos &PPA, const ProfPos &PPB)\r
- {\r
- SCORE Score = 0;\r
- for (unsigned n = 0; n < 20; ++n)\r
- {\r
- const unsigned uLetter = PPA.m_uSortOrder[n];\r
- const FCOUNT fcLetter = PPA.m_fcCounts[uLetter];\r
- if (0 == fcLetter)\r
- break;\r
- Score += fcLetter*PPB.m_AAScores[uLetter];\r
- }\r
- if (0 == Score)\r
- return -2.5;\r
- SCORE logScore = logf(Score);\r
- return (SCORE) (logScore*(PPA.m_fOcc * PPB.m_fOcc));\r
- }\r
-\r
-static SCORE ScoreProfPosDimerPSP(const ProfPos &PPA, const ProfPos &PPB)\r
- {\r
- SCORE Score = 0;\r
- for (unsigned n = 0; n < 20; ++n)\r
- {\r
- const unsigned uLetter = PPA.m_uSortOrder[n];\r
- const FCOUNT fcLetter = PPA.m_fcCounts[uLetter];\r
- if (0 == fcLetter)\r
- break;\r
- Score += fcLetter*PPB.m_AAScores[uLetter];\r
- }\r
- return Score;\r
- }\r
-\r
-static SCORE ScoreProfPosDimer(const ProfPos &PPA, const ProfPos &PPB)\r
- {\r
- switch (g_PPScore)\r
- {\r
- case PPSCORE_LE:\r
- return ScoreProfPosDimerLE(PPA, PPB);\r
-\r
- case PPSCORE_SP:\r
- case PPSCORE_SV:\r
- return ScoreProfPosDimerPSP(PPA, PPB);\r
- }\r
- Quit("Invalid g_PPScore");\r
- return 0;\r
- }\r
-\r
-// Global alignment dynamic programming\r
-// This variant optimizes the profile-profile SP score under the\r
-// dimer approximation.\r
-SCORE GlobalAlignDimer(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,\r
- unsigned uLengthB, PWPath &Path)\r
- {\r
- assert(uLengthB > 0 && uLengthA > 0);\r
-\r
- const unsigned uPrefixCountA = uLengthA + 1;\r
- const unsigned uPrefixCountB = uLengthB + 1;\r
-\r
-// Allocate DP matrices\r
- const size_t LM = uPrefixCountA*uPrefixCountB;\r
- SCORE *DPM_ = new SCORE[LM];\r
- SCORE *DPD_ = new SCORE[LM];\r
- SCORE *DPI_ = new SCORE[LM];\r
-\r
- char *TBM_ = new char[LM];\r
- char *TBD_ = new char[LM];\r
- char *TBI_ = new char[LM];\r
-\r
- DPM(0, 0) = 0;\r
- DPD(0, 0) = MINUS_INFINITY;\r
- DPI(0, 0) = MINUS_INFINITY;\r
-\r
- TBM(0, 0) = 'S';\r
- TBD(0, 0) = '?';\r
- TBI(0, 0) = '?';\r
-\r
- DPM(1, 0) = MINUS_INFINITY;\r
- DPD(1, 0) = GapScoreMD(PA[0], PPTerm);\r
- DPI(1, 0) = MINUS_INFINITY;\r
-\r
- TBM(1, 0) = '?';\r
- TBD(1, 0) = 'S';\r
- TBI(1, 0) = '?';\r
-\r
- DPM(0, 1) = MINUS_INFINITY;\r
- DPD(0, 1) = MINUS_INFINITY;\r
- DPI(0, 1) = GapScoreMI(PPTerm, PB[0]);\r
-\r
- TBM(0, 1) = '?';\r
- TBD(0, 1) = '?';\r
- TBI(0, 1) = 'S';\r
-\r
-// Empty prefix of B is special case\r
- for (unsigned uPrefixLengthA = 2; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)\r
- {\r
- // M=LetterA+LetterB, impossible with empty prefix\r
- DPM(uPrefixLengthA, 0) = MINUS_INFINITY;\r
- TBM(uPrefixLengthA, 0) = '?';\r
-\r
- // D=LetterA+GapB\r
- DPD(uPrefixLengthA, 0) = DPD(uPrefixLengthA - 1, 0) +\r
- GapScoreDD(PA[uPrefixLengthA - 1], PPTerm);\r
- TBD(uPrefixLengthA, 0) = 'D';\r
-\r
- // I=GapA+LetterB, impossible with empty prefix\r
- DPI(uPrefixLengthA, 0) = MINUS_INFINITY;\r
- TBI(uPrefixLengthA, 0) = '?';\r
- }\r
-\r
-// Empty prefix of A is special case\r
- for (unsigned uPrefixLengthB = 2; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)\r
- {\r
- // M=LetterA+LetterB, impossible with empty prefix\r
- DPM(0, uPrefixLengthB) = MINUS_INFINITY;\r
- TBM(0, uPrefixLengthB) = '?';\r
-\r
- // D=LetterA+GapB, impossible with empty prefix\r
- DPD(0, uPrefixLengthB) = MINUS_INFINITY;\r
- TBD(0, uPrefixLengthB) = '?';\r
-\r
- // I=GapA+LetterB\r
- DPI(0, uPrefixLengthB) = DPI(0, uPrefixLengthB - 1) +\r
- GapScoreII(PPTerm, PB[uPrefixLengthB - 1]);\r
- TBI(0, uPrefixLengthB) = 'I';\r
- }\r
-\r
-// ============\r
-// Main DP loop\r
-// ============\r
- for (unsigned uPrefixLengthB = 1; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)\r
- {\r
- const ProfPos &PPB = PB[uPrefixLengthB - 1];\r
- for (unsigned uPrefixLengthA = 1; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)\r
- {\r
- const ProfPos &PPA = PA[uPrefixLengthA - 1];\r
- {\r
- // Match M=LetterA+LetterB\r
- SCORE scoreLL = ScoreProfPosDimer(PPA, PPB);\r
-\r
- SCORE scoreMM = DPM(uPrefixLengthA-1, uPrefixLengthB-1) + GapScoreMM(PPA, PPB);\r
- SCORE scoreDM = DPD(uPrefixLengthA-1, uPrefixLengthB-1) + GapScoreDM(PPA, PPB);\r
- SCORE scoreIM = DPI(uPrefixLengthA-1, uPrefixLengthB-1) + GapScoreIM(PPA, PPB);\r
-\r
- SCORE scoreBest = scoreMM;\r
- char c = 'M';\r
- if (scoreDM > scoreBest)\r
- {\r
- scoreBest = scoreDM;\r
- c = 'D';\r
- }\r
- if (scoreIM > scoreBest)\r
- {\r
- scoreBest = scoreIM;\r
- c = 'I';\r
- }\r
-\r
- DPM(uPrefixLengthA, uPrefixLengthB) = scoreBest + scoreLL;\r
- TBM(uPrefixLengthA, uPrefixLengthB) = c;\r
- }\r
- {\r
- // Delete D=LetterA+GapB\r
- SCORE scoreMD = DPM(uPrefixLengthA-1, uPrefixLengthB) + GapScoreMD(PPA, PPB);\r
- SCORE scoreDD = DPD(uPrefixLengthA-1, uPrefixLengthB) + GapScoreDD(PPA, PPB);\r
- SCORE scoreID = DPI(uPrefixLengthA-1, uPrefixLengthB) + GapScoreID(PPA, PPB);\r
-\r
- SCORE scoreBest = scoreMD;\r
- char c = 'M';\r
- if (scoreDD > scoreBest)\r
- {\r
- scoreBest = scoreDD;\r
- c = 'D';\r
- }\r
- if (scoreID > scoreBest)\r
- {\r
- scoreBest = scoreID;\r
- c = 'I';\r
- }\r
-\r
- DPD(uPrefixLengthA, uPrefixLengthB) = scoreBest;\r
- TBD(uPrefixLengthA, uPrefixLengthB) = c;\r
- }\r
- {\r
- // Insert I=GapA+LetterB\r
- SCORE scoreMI = DPM(uPrefixLengthA, uPrefixLengthB-1) + GapScoreMI(PPA, PPB);\r
- SCORE scoreDI = DPD(uPrefixLengthA, uPrefixLengthB-1) + GapScoreDI(PPA, PPB);\r
- SCORE scoreII = DPI(uPrefixLengthA, uPrefixLengthB-1) + GapScoreII(PPA, PPB);\r
-\r
- SCORE scoreBest = scoreMI;\r
- char c = 'M';\r
- if (scoreDI > scoreBest)\r
- {\r
- scoreBest = scoreDI;\r
- c = 'D';\r
- }\r
- if (scoreII > scoreBest)\r
- {\r
- scoreBest = scoreII;\r
- c = 'I';\r
- }\r
-\r
- DPI(uPrefixLengthA, uPrefixLengthB) = scoreBest;\r
- TBI(uPrefixLengthA, uPrefixLengthB) = c;\r
- }\r
- }\r
- }\r
-\r
-#if TRACE\r
- Log("DPM:\n");\r
- ListDP(DPM_, PA, PB, uPrefixCountA, uPrefixCountB);\r
- Log("DPD:\n");\r
- ListDP(DPD_, PA, PB, uPrefixCountA, uPrefixCountB);\r
- Log("DPI:\n");\r
- ListDP(DPI_, PA, PB, uPrefixCountA, uPrefixCountB);\r
- Log("TBM:\n");\r
- ListTB(TBM_, PA, PB, uPrefixCountA, uPrefixCountB);\r
- Log("TBD:\n");\r
- ListTB(TBD_, PA, PB, uPrefixCountA, uPrefixCountB);\r
- Log("TBI:\n");\r
- ListTB(TBI_, PA, PB, uPrefixCountA, uPrefixCountB);\r
-#endif\r
-\r
- SCORE Score = TraceBackDimer(DPM_, DPD_, DPI_, TBM_, TBD_, TBI_,\r
- uLengthA, uLengthB, Path);\r
-\r
-#if TRACE\r
- Log("GlobalAlignDimer score = %.3g\n", Score);\r
-#endif\r
-\r
- delete[] DPM_;\r
- delete[] DPD_;\r
- delete[] DPI_;\r
-\r
- delete[] TBM_;\r
- delete[] TBD_;\r
- delete[] TBI_;\r
-\r
- return Score;\r
- }\r
-\r
-static SCORE TraceBackDimer( const SCORE *DPM_, const SCORE *DPD_, const SCORE *DPI_,\r
- const char *TBM_, const char *TBD_, const char *TBI_,\r
- unsigned uLengthA, unsigned uLengthB, PWPath &Path)\r
- {\r
- const unsigned uPrefixCountA = uLengthA + 1;\r
-\r
- unsigned uPrefixLengthA = uLengthA;\r
- unsigned uPrefixLengthB = uLengthB;\r
-\r
- char cEdge = 'M';\r
- SCORE scoreMax = DPM(uLengthA, uLengthB);\r
- if (DPD(uLengthA, uLengthB) > scoreMax)\r
- {\r
- scoreMax = DPD(uLengthA, uLengthB);\r
- cEdge = 'D';\r
- }\r
- if (DPI(uLengthA, uLengthB) > scoreMax)\r
- {\r
- scoreMax = DPI(uLengthA, uLengthB);\r
- cEdge = 'I';\r
- }\r
-\r
- for (;;)\r
- {\r
- if (0 == uPrefixLengthA && 0 == uPrefixLengthB)\r
- break;\r
-\r
- PWEdge Edge;\r
- Edge.cType = cEdge;\r
- Edge.uPrefixLengthA = uPrefixLengthA;\r
- Edge.uPrefixLengthB = uPrefixLengthB;\r
- Path.PrependEdge(Edge);\r
-\r
-#if TRACE\r
- Log("PLA=%u PLB=%u Edge=%c\n", uPrefixLengthA, uPrefixLengthB, cEdge);\r
-#endif\r
- switch (cEdge)\r
- {\r
- case 'M':\r
- assert(uPrefixLengthA > 0 && uPrefixLengthB > 0);\r
- cEdge = TBM(uPrefixLengthA, uPrefixLengthB);\r
- --uPrefixLengthA;\r
- --uPrefixLengthB;\r
- break;\r
- case 'D':\r
- assert(uPrefixLengthA > 0);\r
- cEdge = TBD(uPrefixLengthA, uPrefixLengthB);\r
- --uPrefixLengthA;\r
- break;\r
- case 'I':\r
- assert(uPrefixLengthB > 0);\r
- cEdge = TBI(uPrefixLengthA, uPrefixLengthB);\r
- --uPrefixLengthB;\r
- break;\r
- default:\r
- Quit("Invalid edge PLA=%u PLB=%u %c", uPrefixLengthA, uPrefixLengthB, cEdge);\r
- }\r
- }\r
-#if TRACE\r
- Path.LogMe();\r
-#endif\r
- return scoreMax;\r
- }\r