Mac binaries
[jabaws.git] / website / archive / binaries / mac / src / muscle / glbalndimer.cpp
diff --git a/website/archive/binaries/mac/src/muscle/glbalndimer.cpp b/website/archive/binaries/mac/src/muscle/glbalndimer.cpp
new file mode 100644 (file)
index 0000000..8ef3a6c
--- /dev/null
@@ -0,0 +1,390 @@
+#include "muscle.h"\r
+#include <math.h>\r
+#include <stdio.h>     // for sprintf\r
+#include "pwpath.h"\r
+#include "profile.h"\r
+#include "gapscoredimer.h"\r
+\r
+#define        TRACE   0\r
+\r
+static SCORE TraceBackDimer(  const SCORE *DPM_, const SCORE *DPD_, const SCORE *DPI_,\r
+  const char *TBM_, const char *TBD_, const char *TBI_,\r
+  unsigned uLengthA, unsigned uLengthB, PWPath &Path);\r
+\r
+static const char *LocalScoreToStr(SCORE s)\r
+       {\r
+       static char str[16];\r
+       if (MINUS_INFINITY == s)\r
+               return "     *";\r
+       sprintf(str, "%6.3g", s);\r
+       return str;\r
+       }\r
+\r
+#if    TRACE\r
+static void ListDP(const SCORE *DPM_, const ProfPos *PA, const ProfPos *PB,\r
+  unsigned uPrefixCountA, unsigned uPrefixCountB)\r
+       {\r
+       Log("        ");\r
+       for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)\r
+               {\r
+               char c = ' ';\r
+               if (uPrefixLengthB > 0)\r
+                       c = ConsensusChar(PB[uPrefixLengthB - 1]);\r
+               Log(" %4u:%c", uPrefixLengthB, c);\r
+               }\r
+       Log("\n");\r
+       for (unsigned uPrefixLengthA = 0; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)\r
+               {\r
+               char c = ' ';\r
+               if (uPrefixLengthA > 0)\r
+                       c = ConsensusChar(PA[uPrefixLengthA - 1]);\r
+               Log("%4u:%c  ", uPrefixLengthA, c);\r
+               for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)\r
+                       Log(" %s", LocalScoreToStr(DPM(uPrefixLengthA, uPrefixLengthB)));\r
+               Log("\n");\r
+               }\r
+       }\r
+\r
+static void ListTB(const char *TBM_, const ProfPos *PA, const ProfPos *PB,\r
+  unsigned uPrefixCountA, unsigned uPrefixCountB)\r
+       {\r
+       Log("        ");\r
+       for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)\r
+               Log("%2d", uPrefixLengthB);\r
+       Log("\n");\r
+       Log("        ");\r
+       for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)\r
+               {\r
+               char c = ' ';\r
+               if (uPrefixLengthB > 0)\r
+                       c = ConsensusChar(PB[uPrefixLengthB - 1]);\r
+               Log(" %c", c);\r
+               }\r
+       Log("\n");\r
+       for (unsigned uPrefixLengthA = 0; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)\r
+               {\r
+               char c = ' ';\r
+               if (uPrefixLengthA > 0)\r
+                       c = ConsensusChar(PA[uPrefixLengthA - 1]);\r
+               Log("%4u:%c  ", uPrefixLengthA, c);\r
+               for (unsigned uPrefixLengthB = 0; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)\r
+                       Log(" %c", TBM(uPrefixLengthA, uPrefixLengthB));\r
+               Log("\n");\r
+               }\r
+       }\r
+#endif // TRACE\r
+\r
+static ProfPos PPTerm;\r
+static bool InitializePPTerm()\r
+       {\r
+       PPTerm.m_bAllGaps = false;\r
+       PPTerm.m_LL = 1;\r
+       PPTerm.m_LG = 0;\r
+       PPTerm.m_GL = 0;\r
+       PPTerm.m_GG = 0;\r
+       PPTerm.m_fOcc = 1;\r
+       return true;\r
+       }\r
+static bool PPTermInitialized = InitializePPTerm();\r
+\r
+static SCORE ScoreProfPosDimerLE(const ProfPos &PPA, const ProfPos &PPB)\r
+       {\r
+       SCORE Score = 0;\r
+       for (unsigned n = 0; n < 20; ++n)\r
+               {\r
+               const unsigned uLetter = PPA.m_uSortOrder[n];\r
+               const FCOUNT fcLetter = PPA.m_fcCounts[uLetter];\r
+               if (0 == fcLetter)\r
+                       break;\r
+               Score += fcLetter*PPB.m_AAScores[uLetter];\r
+               }\r
+       if (0 == Score)\r
+               return -2.5;\r
+       SCORE logScore = logf(Score);\r
+       return (SCORE) (logScore*(PPA.m_fOcc * PPB.m_fOcc));\r
+       }\r
+\r
+static SCORE ScoreProfPosDimerPSP(const ProfPos &PPA, const ProfPos &PPB)\r
+       {\r
+       SCORE Score = 0;\r
+       for (unsigned n = 0; n < 20; ++n)\r
+               {\r
+               const unsigned uLetter = PPA.m_uSortOrder[n];\r
+               const FCOUNT fcLetter = PPA.m_fcCounts[uLetter];\r
+               if (0 == fcLetter)\r
+                       break;\r
+               Score += fcLetter*PPB.m_AAScores[uLetter];\r
+               }\r
+       return Score;\r
+       }\r
+\r
+static SCORE ScoreProfPosDimer(const ProfPos &PPA, const ProfPos &PPB)\r
+       {\r
+       switch (g_PPScore)\r
+               {\r
+       case PPSCORE_LE:\r
+               return ScoreProfPosDimerLE(PPA, PPB);\r
+\r
+       case PPSCORE_SP:\r
+       case PPSCORE_SV:\r
+               return ScoreProfPosDimerPSP(PPA, PPB);\r
+               }\r
+       Quit("Invalid g_PPScore");\r
+       return 0;\r
+       }\r
+\r
+// Global alignment dynamic programming\r
+// This variant optimizes the profile-profile SP score under the\r
+// dimer approximation.\r
+SCORE GlobalAlignDimer(const ProfPos *PA, unsigned uLengthA, const ProfPos *PB,\r
+  unsigned uLengthB, PWPath &Path)\r
+       {\r
+       assert(uLengthB > 0 && uLengthA > 0);\r
+\r
+       const unsigned uPrefixCountA = uLengthA + 1;\r
+       const unsigned uPrefixCountB = uLengthB + 1;\r
+\r
+// Allocate DP matrices\r
+       const size_t LM = uPrefixCountA*uPrefixCountB;\r
+       SCORE *DPM_ = new SCORE[LM];\r
+       SCORE *DPD_ = new SCORE[LM];\r
+       SCORE *DPI_ = new SCORE[LM];\r
+\r
+       char *TBM_ = new char[LM];\r
+       char *TBD_ = new char[LM];\r
+       char *TBI_ = new char[LM];\r
+\r
+       DPM(0, 0) = 0;\r
+       DPD(0, 0) = MINUS_INFINITY;\r
+       DPI(0, 0) = MINUS_INFINITY;\r
+\r
+       TBM(0, 0) = 'S';\r
+       TBD(0, 0) = '?';\r
+       TBI(0, 0) = '?';\r
+\r
+       DPM(1, 0) = MINUS_INFINITY;\r
+       DPD(1, 0) = GapScoreMD(PA[0], PPTerm);\r
+       DPI(1, 0) = MINUS_INFINITY;\r
+\r
+       TBM(1, 0) = '?';\r
+       TBD(1, 0) = 'S';\r
+       TBI(1, 0) = '?';\r
+\r
+       DPM(0, 1) = MINUS_INFINITY;\r
+       DPD(0, 1) = MINUS_INFINITY;\r
+       DPI(0, 1) = GapScoreMI(PPTerm, PB[0]);\r
+\r
+       TBM(0, 1) = '?';\r
+       TBD(0, 1) = '?';\r
+       TBI(0, 1) = 'S';\r
+\r
+// Empty prefix of B is special case\r
+       for (unsigned uPrefixLengthA = 2; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)\r
+               {\r
+       // M=LetterA+LetterB, impossible with empty prefix\r
+               DPM(uPrefixLengthA, 0) = MINUS_INFINITY;\r
+               TBM(uPrefixLengthA, 0) = '?';\r
+\r
+       // D=LetterA+GapB\r
+               DPD(uPrefixLengthA, 0) = DPD(uPrefixLengthA - 1, 0) +\r
+                 GapScoreDD(PA[uPrefixLengthA - 1], PPTerm);\r
+               TBD(uPrefixLengthA, 0) = 'D';\r
+\r
+       // I=GapA+LetterB, impossible with empty prefix\r
+               DPI(uPrefixLengthA, 0) = MINUS_INFINITY;\r
+               TBI(uPrefixLengthA, 0) = '?';\r
+               }\r
+\r
+// Empty prefix of A is special case\r
+       for (unsigned uPrefixLengthB = 2; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)\r
+               {\r
+       // M=LetterA+LetterB, impossible with empty prefix\r
+               DPM(0, uPrefixLengthB) = MINUS_INFINITY;\r
+               TBM(0, uPrefixLengthB) = '?';\r
+\r
+       // D=LetterA+GapB, impossible with empty prefix\r
+               DPD(0, uPrefixLengthB) = MINUS_INFINITY;\r
+               TBD(0, uPrefixLengthB) = '?';\r
+\r
+       // I=GapA+LetterB\r
+               DPI(0, uPrefixLengthB) = DPI(0, uPrefixLengthB - 1) +\r
+                 GapScoreII(PPTerm, PB[uPrefixLengthB - 1]);\r
+               TBI(0, uPrefixLengthB) = 'I';\r
+               }\r
+\r
+// ============\r
+// Main DP loop\r
+// ============\r
+       for (unsigned uPrefixLengthB = 1; uPrefixLengthB < uPrefixCountB; ++uPrefixLengthB)\r
+               {\r
+               const ProfPos &PPB = PB[uPrefixLengthB - 1];\r
+               for (unsigned uPrefixLengthA = 1; uPrefixLengthA < uPrefixCountA; ++uPrefixLengthA)\r
+                       {\r
+                       const ProfPos &PPA = PA[uPrefixLengthA - 1];\r
+                       {\r
+               // Match M=LetterA+LetterB\r
+                       SCORE scoreLL = ScoreProfPosDimer(PPA, PPB);\r
+\r
+                       SCORE scoreMM = DPM(uPrefixLengthA-1, uPrefixLengthB-1) + GapScoreMM(PPA, PPB);\r
+                       SCORE scoreDM = DPD(uPrefixLengthA-1, uPrefixLengthB-1) + GapScoreDM(PPA, PPB);\r
+                       SCORE scoreIM = DPI(uPrefixLengthA-1, uPrefixLengthB-1) + GapScoreIM(PPA, PPB);\r
+\r
+                       SCORE scoreBest = scoreMM;\r
+                       char c = 'M';\r
+                       if (scoreDM > scoreBest)\r
+                               {\r
+                               scoreBest = scoreDM;\r
+                               c = 'D';\r
+                               }\r
+                       if (scoreIM > scoreBest)\r
+                               {\r
+                               scoreBest = scoreIM;\r
+                               c = 'I';\r
+                               }\r
+\r
+                       DPM(uPrefixLengthA, uPrefixLengthB) = scoreBest + scoreLL;\r
+                       TBM(uPrefixLengthA, uPrefixLengthB) = c;\r
+                       }\r
+                       {\r
+               // Delete D=LetterA+GapB\r
+                       SCORE scoreMD = DPM(uPrefixLengthA-1, uPrefixLengthB) + GapScoreMD(PPA, PPB);\r
+                       SCORE scoreDD = DPD(uPrefixLengthA-1, uPrefixLengthB) + GapScoreDD(PPA, PPB);\r
+                       SCORE scoreID = DPI(uPrefixLengthA-1, uPrefixLengthB) + GapScoreID(PPA, PPB);\r
+\r
+                       SCORE scoreBest = scoreMD;\r
+                       char c = 'M';\r
+                       if (scoreDD > scoreBest)\r
+                               {\r
+                               scoreBest = scoreDD;\r
+                               c = 'D';\r
+                               }\r
+                       if (scoreID > scoreBest)\r
+                               {\r
+                               scoreBest = scoreID;\r
+                               c = 'I';\r
+                               }\r
+\r
+                       DPD(uPrefixLengthA, uPrefixLengthB) = scoreBest;\r
+                       TBD(uPrefixLengthA, uPrefixLengthB) = c;\r
+                       }\r
+                       {\r
+               // Insert I=GapA+LetterB\r
+                       SCORE scoreMI = DPM(uPrefixLengthA, uPrefixLengthB-1) + GapScoreMI(PPA, PPB);\r
+                       SCORE scoreDI = DPD(uPrefixLengthA, uPrefixLengthB-1) + GapScoreDI(PPA, PPB);\r
+                       SCORE scoreII = DPI(uPrefixLengthA, uPrefixLengthB-1) + GapScoreII(PPA, PPB);\r
+\r
+                       SCORE scoreBest = scoreMI;\r
+                       char c = 'M';\r
+                       if (scoreDI > scoreBest)\r
+                               {\r
+                               scoreBest = scoreDI;\r
+                               c = 'D';\r
+                               }\r
+                       if (scoreII > scoreBest)\r
+                               {\r
+                               scoreBest = scoreII;\r
+                               c = 'I';\r
+                               }\r
+\r
+                       DPI(uPrefixLengthA, uPrefixLengthB) = scoreBest;\r
+                       TBI(uPrefixLengthA, uPrefixLengthB) = c;\r
+                       }\r
+                       }\r
+               }\r
+\r
+#if TRACE\r
+       Log("DPM:\n");\r
+       ListDP(DPM_, PA, PB, uPrefixCountA, uPrefixCountB);\r
+       Log("DPD:\n");\r
+       ListDP(DPD_, PA, PB, uPrefixCountA, uPrefixCountB);\r
+       Log("DPI:\n");\r
+       ListDP(DPI_, PA, PB, uPrefixCountA, uPrefixCountB);\r
+       Log("TBM:\n");\r
+       ListTB(TBM_, PA, PB, uPrefixCountA, uPrefixCountB);\r
+       Log("TBD:\n");\r
+       ListTB(TBD_, PA, PB, uPrefixCountA, uPrefixCountB);\r
+       Log("TBI:\n");\r
+       ListTB(TBI_, PA, PB, uPrefixCountA, uPrefixCountB);\r
+#endif\r
+\r
+       SCORE Score = TraceBackDimer(DPM_, DPD_, DPI_, TBM_, TBD_, TBI_,\r
+         uLengthA, uLengthB, Path);\r
+\r
+#if    TRACE\r
+       Log("GlobalAlignDimer score = %.3g\n", Score);\r
+#endif\r
+\r
+       delete[] DPM_;\r
+       delete[] DPD_;\r
+       delete[] DPI_;\r
+\r
+       delete[] TBM_;\r
+       delete[] TBD_;\r
+       delete[] TBI_;\r
+\r
+       return Score;\r
+       }\r
+\r
+static SCORE TraceBackDimer(  const SCORE *DPM_, const SCORE *DPD_, const SCORE *DPI_,\r
+  const char *TBM_, const char *TBD_, const char *TBI_,\r
+  unsigned uLengthA, unsigned uLengthB, PWPath &Path)\r
+       {\r
+       const unsigned uPrefixCountA = uLengthA + 1;\r
+\r
+       unsigned uPrefixLengthA = uLengthA;\r
+       unsigned uPrefixLengthB = uLengthB;\r
+\r
+       char cEdge = 'M';\r
+       SCORE scoreMax = DPM(uLengthA, uLengthB);\r
+       if (DPD(uLengthA, uLengthB) > scoreMax)\r
+               {\r
+               scoreMax = DPD(uLengthA, uLengthB);\r
+               cEdge = 'D';\r
+               }\r
+       if (DPI(uLengthA, uLengthB) > scoreMax)\r
+               {\r
+               scoreMax = DPI(uLengthA, uLengthB);\r
+               cEdge = 'I';\r
+               }\r
+\r
+       for (;;)\r
+               {\r
+               if (0 == uPrefixLengthA && 0 == uPrefixLengthB)\r
+                       break;\r
+\r
+               PWEdge Edge;\r
+               Edge.cType = cEdge;\r
+               Edge.uPrefixLengthA = uPrefixLengthA;\r
+               Edge.uPrefixLengthB = uPrefixLengthB;\r
+               Path.PrependEdge(Edge);\r
+\r
+#if TRACE\r
+               Log("PLA=%u PLB=%u Edge=%c\n", uPrefixLengthA, uPrefixLengthB, cEdge);\r
+#endif\r
+               switch (cEdge)\r
+                       {\r
+               case 'M':\r
+                       assert(uPrefixLengthA > 0 && uPrefixLengthB > 0);\r
+                       cEdge = TBM(uPrefixLengthA, uPrefixLengthB);\r
+                       --uPrefixLengthA;\r
+                       --uPrefixLengthB;\r
+                       break;\r
+               case 'D':\r
+                       assert(uPrefixLengthA > 0);\r
+                       cEdge = TBD(uPrefixLengthA, uPrefixLengthB);\r
+                       --uPrefixLengthA;\r
+                       break;\r
+               case 'I':\r
+                       assert(uPrefixLengthB > 0);\r
+                       cEdge = TBI(uPrefixLengthA, uPrefixLengthB);\r
+                       --uPrefixLengthB;\r
+                       break;\r
+               default:\r
+                       Quit("Invalid edge PLA=%u PLB=%u %c", uPrefixLengthA, uPrefixLengthB, cEdge);\r
+                       }\r
+               }\r
+#if    TRACE\r
+       Path.LogMe();\r
+#endif\r
+       return scoreMax;\r
+       }\r