Mac binaries
[jabaws.git] / website / archive / binaries / mac / src / muscle / objscore2.cpp
diff --git a/website/archive/binaries/mac/src/muscle/objscore2.cpp b/website/archive/binaries/mac/src/muscle/objscore2.cpp
new file mode 100644 (file)
index 0000000..bac5b5a
--- /dev/null
@@ -0,0 +1,522 @@
+#include "muscle.h"\r
+#include "msa.h"\r
+#include "profile.h"\r
+#include "objscore.h"\r
+\r
+#define TRACE                  0\r
+#define TRACE_SEQPAIR  0\r
+#define TEST_SPFAST            0\r
+\r
+extern SCOREMATRIX VTML_LA;\r
+extern SCOREMATRIX PAM200;\r
+extern SCOREMATRIX PAM200NoCenter;\r
+extern SCOREMATRIX VTML_SP;\r
+extern SCOREMATRIX VTML_SPNoCenter;\r
+extern SCOREMATRIX NUC_SP;\r
+\r
+SCORE g_SPScoreLetters;\r
+SCORE g_SPScoreGaps;\r
+\r
+static SCORE TermGapScore(bool Gap)\r
+       {\r
+       switch (g_TermGaps)\r
+               {\r
+       case TERMGAPS_Full:\r
+               return 0;\r
+\r
+       case TERMGAPS_Half:\r
+               if (Gap)\r
+                       return g_scoreGapOpen/2;\r
+               return 0;\r
+\r
+       case TERMGAPS_Ext:\r
+               if (Gap)\r
+                       return g_scoreGapExtend;\r
+               return 0;\r
+               }\r
+       Quit("TermGapScore?!");\r
+       return 0;\r
+       }\r
+\r
+SCORE ScoreSeqPairLetters(const MSA &msa1, unsigned uSeqIndex1,\r
+  const MSA &msa2, unsigned uSeqIndex2)\r
+       {\r
+       const unsigned uColCount = msa1.GetColCount();\r
+       const unsigned uColCount2 = msa2.GetColCount();\r
+       if (uColCount != uColCount2)\r
+               Quit("ScoreSeqPairLetters, different lengths");\r
+\r
+#if    TRACE_SEQPAIR\r
+       {\r
+       Log("\n");\r
+       Log("ScoreSeqPairLetters\n");\r
+       MSA msaTmp;\r
+       msaTmp.SetSize(2, uColCount);\r
+       msaTmp.CopySeq(0, msa1, uSeqIndex1);\r
+       msaTmp.CopySeq(1, msa2, uSeqIndex2);\r
+       msaTmp.LogMe();\r
+       }\r
+#endif\r
+\r
+       SCORE scoreLetters = 0;\r
+       SCORE scoreGaps = 0;\r
+       bool bGapping1 = false;\r
+       bool bGapping2 = false;\r
+\r
+       unsigned uColStart = 0;\r
+       bool bLeftTermGap = false;\r
+       for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)\r
+               {\r
+               bool bGap1 = msa1.IsGap(uSeqIndex1, uColIndex);\r
+               bool bGap2 = msa2.IsGap(uSeqIndex2, uColIndex);\r
+               if (!bGap1 || !bGap2)\r
+                       {\r
+                       if (bGap1 || bGap2)\r
+                               bLeftTermGap = true;\r
+                       uColStart = uColIndex;\r
+                       break;\r
+                       }\r
+               }\r
+\r
+       unsigned uColEnd = uColCount - 1;\r
+       bool bRightTermGap = false;\r
+       for (int iColIndex = (int) uColCount - 1; iColIndex >= 0; --iColIndex)\r
+               {\r
+               bool bGap1 = msa1.IsGap(uSeqIndex1, iColIndex);\r
+               bool bGap2 = msa2.IsGap(uSeqIndex2, iColIndex);\r
+               if (!bGap1 || !bGap2)\r
+                       {\r
+                       if (bGap1 || bGap2)\r
+                               bRightTermGap = true;\r
+                       uColEnd = (unsigned) iColIndex;\r
+                       break;\r
+                       }\r
+               }\r
+\r
+#if    TRACE_SEQPAIR\r
+       Log("LeftTermGap=%d RightTermGap=%d\n", bLeftTermGap, bRightTermGap);\r
+#endif\r
+\r
+       for (unsigned uColIndex = uColStart; uColIndex <= uColEnd; ++uColIndex)\r
+               {\r
+               unsigned uLetter1 = msa1.GetLetterEx(uSeqIndex1, uColIndex);\r
+               if (uLetter1 >= g_AlphaSize)\r
+                       continue;\r
+               unsigned uLetter2 = msa2.GetLetterEx(uSeqIndex2, uColIndex);\r
+               if (uLetter2 >= g_AlphaSize)\r
+                       continue;\r
+\r
+               SCORE scoreMatch = (*g_ptrScoreMatrix)[uLetter1][uLetter2];\r
+               scoreLetters += scoreMatch;\r
+               }\r
+       return scoreLetters;\r
+       }\r
+\r
+SCORE ScoreSeqPairGaps(const MSA &msa1, unsigned uSeqIndex1,\r
+  const MSA &msa2, unsigned uSeqIndex2)\r
+       {\r
+       const unsigned uColCount = msa1.GetColCount();\r
+       const unsigned uColCount2 = msa2.GetColCount();\r
+       if (uColCount != uColCount2)\r
+               Quit("ScoreSeqPairGaps, different lengths");\r
+\r
+#if    TRACE_SEQPAIR\r
+       {\r
+       Log("\n");\r
+       Log("ScoreSeqPairGaps\n");\r
+       MSA msaTmp;\r
+       msaTmp.SetSize(2, uColCount);\r
+       msaTmp.CopySeq(0, msa1, uSeqIndex1);\r
+       msaTmp.CopySeq(1, msa2, uSeqIndex2);\r
+       msaTmp.LogMe();\r
+       }\r
+#endif\r
+\r
+       SCORE scoreGaps = 0;\r
+       bool bGapping1 = false;\r
+       bool bGapping2 = false;\r
+\r
+       unsigned uColStart = 0;\r
+       bool bLeftTermGap = false;\r
+       for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)\r
+               {\r
+               bool bGap1 = msa1.IsGap(uSeqIndex1, uColIndex);\r
+               bool bGap2 = msa2.IsGap(uSeqIndex2, uColIndex);\r
+               if (!bGap1 || !bGap2)\r
+                       {\r
+                       if (bGap1 || bGap2)\r
+                               bLeftTermGap = true;\r
+                       uColStart = uColIndex;\r
+                       break;\r
+                       }\r
+               }\r
+\r
+       unsigned uColEnd = uColCount - 1;\r
+       bool bRightTermGap = false;\r
+       for (int iColIndex = (int) uColCount - 1; iColIndex >= 0; --iColIndex)\r
+               {\r
+               bool bGap1 = msa1.IsGap(uSeqIndex1, iColIndex);\r
+               bool bGap2 = msa2.IsGap(uSeqIndex2, iColIndex);\r
+               if (!bGap1 || !bGap2)\r
+                       {\r
+                       if (bGap1 || bGap2)\r
+                               bRightTermGap = true;\r
+                       uColEnd = (unsigned) iColIndex;\r
+                       break;\r
+                       }\r
+               }\r
+\r
+#if    TRACE_SEQPAIR\r
+       Log("LeftTermGap=%d RightTermGap=%d\n", bLeftTermGap, bRightTermGap);\r
+#endif\r
+\r
+       for (unsigned uColIndex = uColStart; uColIndex <= uColEnd; ++uColIndex)\r
+               {\r
+               bool bGap1 = msa1.IsGap(uSeqIndex1, uColIndex);\r
+               bool bGap2 = msa2.IsGap(uSeqIndex2, uColIndex);\r
+\r
+               if (bGap1 && bGap2)\r
+                       continue;\r
+\r
+               if (bGap1)\r
+                       {\r
+                       if (!bGapping1)\r
+                               {\r
+#if    TRACE_SEQPAIR\r
+                               Log("Gap open seq 1 col %d\n", uColIndex);\r
+#endif\r
+                               if (uColIndex == uColStart)\r
+                                       scoreGaps += TermGapScore(true);\r
+                               else\r
+                                       scoreGaps += g_scoreGapOpen;\r
+                               bGapping1 = true;\r
+                               }\r
+                       else\r
+                               scoreGaps += g_scoreGapExtend;\r
+                       continue;\r
+                       }\r
+\r
+               else if (bGap2)\r
+                       {\r
+                       if (!bGapping2)\r
+                               {\r
+#if    TRACE_SEQPAIR\r
+                               Log("Gap open seq 2 col %d\n", uColIndex);\r
+#endif\r
+                               if (uColIndex == uColStart)\r
+                                       scoreGaps += TermGapScore(true);\r
+                               else\r
+                                       scoreGaps += g_scoreGapOpen;\r
+                               bGapping2 = true;\r
+                               }\r
+                       else\r
+                               scoreGaps += g_scoreGapExtend;\r
+                       continue;\r
+                       }\r
+\r
+               bGapping1 = false;\r
+               bGapping2 = false;\r
+               }\r
+\r
+       if (bGapping1 || bGapping2)\r
+               {\r
+               scoreGaps -= g_scoreGapOpen;\r
+               scoreGaps += TermGapScore(true);\r
+               }\r
+       return scoreGaps;\r
+       }\r
+\r
+// The usual sum-of-pairs objective score: sum the score\r
+// of the alignment of each pair of sequences.\r
+SCORE ObjScoreSP(const MSA &msa, SCORE MatchScore[])\r
+       {\r
+#if    TRACE\r
+       Log("==================ObjScoreSP==============\n");\r
+       Log("msa=\n");\r
+       msa.LogMe();\r
+#endif\r
+       g_SPScoreLetters = 0;\r
+       g_SPScoreGaps = 0;\r
+\r
+       if (0 != MatchScore)\r
+               {\r
+               const unsigned uColCount = msa.GetColCount();\r
+               for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)\r
+                       MatchScore[uColIndex] = 0;\r
+               }\r
+\r
+       const unsigned uSeqCount = msa.GetSeqCount();\r
+       SCORE scoreTotal = 0;\r
+       unsigned uPairCount = 0;\r
+#if    TRACE\r
+       Log("Seq1  Seq2     wt1     wt2    Letters         Gaps  Unwt.Score    Wt.Score       Total\n");\r
+       Log("----  ----  ------  ------  ----------  ----------  ----------  ----------  ----------\n");\r
+#endif\r
+       for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1)\r
+               {\r
+               const WEIGHT w1 = msa.GetSeqWeight(uSeqIndex1);\r
+               for (unsigned uSeqIndex2 = uSeqIndex1 + 1; uSeqIndex2 < uSeqCount; ++uSeqIndex2)\r
+                       {\r
+                       const WEIGHT w2 = msa.GetSeqWeight(uSeqIndex2);\r
+                       const WEIGHT w = w1*w2;\r
+\r
+                       SCORE scoreLetters = ScoreSeqPairLetters(msa, uSeqIndex1, msa, uSeqIndex2);\r
+                       SCORE scoreGaps = ScoreSeqPairGaps(msa, uSeqIndex1, msa, uSeqIndex2);\r
+                       SCORE scorePair = scoreLetters + scoreGaps;\r
+                       ++uPairCount;\r
+\r
+                       scoreTotal += w*scorePair;\r
+\r
+                       g_SPScoreLetters += w*scoreLetters;\r
+                       g_SPScoreGaps += w*scoreGaps;\r
+#if    TRACE\r
+                       Log("%4d  %4d  %6.3f  %6.3f  %10.2f  %10.2f  %10.2f  %10.2f  %10.2f >%s >%s\n",\r
+                         uSeqIndex1,\r
+                         uSeqIndex2,\r
+                         w1,\r
+                         w2,\r
+                         scoreLetters,\r
+                         scoreGaps,\r
+                         scorePair,\r
+                         scorePair*w1*w2,\r
+                         scoreTotal,\r
+                         msa.GetSeqName(uSeqIndex1),\r
+                         msa.GetSeqName(uSeqIndex2));\r
+#endif\r
+                       }\r
+               }\r
+#if    TEST_SPFAST\r
+       {\r
+       SCORE f = ObjScoreSPFast(msa);\r
+       Log("Fast  = %.6g\n", f);\r
+       Log("Brute = %.6g\n", scoreTotal);\r
+       if (BTEq(f, scoreTotal))\r
+               Log("Agree\n");\r
+       else\r
+               Log("** DISAGREE **\n");\r
+       }\r
+#endif\r
+//     return scoreTotal / uPairCount;\r
+       return scoreTotal;\r
+       }\r
+\r
+// Objective score defined as the dynamic programming score.\r
+// Input is two alignments, which must be of the same length.\r
+// Result is the same profile-profile score that is optimized\r
+// by dynamic programming.\r
+SCORE ObjScoreDP(const MSA &msa1, const MSA &msa2, SCORE MatchScore[])\r
+       {\r
+       const unsigned uColCount = msa1.GetColCount();\r
+       if (msa2.GetColCount() != uColCount)\r
+               Quit("ObjScoreDP, must be same length");\r
+\r
+       const unsigned uColCount1 = msa1.GetColCount();\r
+       const unsigned uColCount2 = msa2.GetColCount();\r
+\r
+       const ProfPos *PA = ProfileFromMSA(msa1);\r
+       const ProfPos *PB = ProfileFromMSA(msa2);\r
+\r
+       return ObjScoreDP_Profs(PA, PB, uColCount1, MatchScore);\r
+       }\r
+\r
+SCORE ObjScoreDP_Profs(const ProfPos *PA, const ProfPos *PB, unsigned uColCount,\r
+  SCORE MatchScore[])\r
+       {\r
+//#if  TRACE\r
+//     Log("Profile 1:\n");\r
+//     ListProfile(PA, uColCount, &msa1);\r
+//\r
+//     Log("Profile 2:\n");\r
+//     ListProfile(PB, uColCount, &msa2);\r
+//#endif\r
+\r
+       SCORE scoreTotal = 0;\r
+\r
+       for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)\r
+               {\r
+               const ProfPos &PPA = PA[uColIndex];\r
+               const ProfPos &PPB = PB[uColIndex];\r
+\r
+               SCORE scoreGap = 0;\r
+               SCORE scoreMatch = 0;\r
+       // If gapped column...\r
+               if (PPA.m_bAllGaps && PPB.m_bAllGaps)\r
+                       scoreGap = 0;\r
+               else if (PPA.m_bAllGaps)\r
+                       {\r
+                       if (uColCount - 1 == uColIndex || !PA[uColIndex+1].m_bAllGaps)\r
+                               scoreGap = PPB.m_scoreGapClose;\r
+                       if (0 == uColIndex || !PA[uColIndex-1].m_bAllGaps)\r
+                               scoreGap += PPB.m_scoreGapOpen;\r
+                       //if (0 == scoreGap)\r
+                       //      scoreGap = PPB.m_scoreGapExtend;\r
+                       }\r
+               else if (PPB.m_bAllGaps)\r
+                       {\r
+                       if (uColCount - 1 == uColIndex || !PB[uColIndex+1].m_bAllGaps)\r
+                               scoreGap = PPA.m_scoreGapClose;\r
+                       if (0 == uColIndex || !PB[uColIndex-1].m_bAllGaps)\r
+                               scoreGap += PPA.m_scoreGapOpen;\r
+                       //if (0 == scoreGap)\r
+                       //      scoreGap = PPA.m_scoreGapExtend;\r
+                       }\r
+               else\r
+                       scoreMatch = ScoreProfPos2(PPA, PPB);\r
+\r
+               if (0 != MatchScore)\r
+                       MatchScore[uColIndex] = scoreMatch;\r
+\r
+               scoreTotal += scoreMatch + scoreGap;\r
+\r
+               extern bool g_bTracePPScore;\r
+               extern MSA *g_ptrPPScoreMSA1;\r
+               extern MSA *g_ptrPPScoreMSA2;\r
+               if (g_bTracePPScore)\r
+                       {\r
+                       const MSA &msa1 = *g_ptrPPScoreMSA1;\r
+                       const MSA &msa2 = *g_ptrPPScoreMSA2;\r
+                       const unsigned uSeqCount1 = msa1.GetSeqCount();\r
+                       const unsigned uSeqCount2 = msa2.GetSeqCount();\r
+\r
+                       for (unsigned n = 0; n < uSeqCount1; ++n)\r
+                               Log("%c", msa1.GetChar(n, uColIndex));\r
+                       Log("  ");\r
+                       for (unsigned n = 0; n < uSeqCount2; ++n)\r
+                               Log("%c", msa2.GetChar(n, uColIndex));\r
+                       Log("  %10.3f", scoreMatch);\r
+                       if (scoreGap != 0)\r
+                               Log("  %10.3f", scoreGap);\r
+                       Log("\n");\r
+                       }\r
+               }\r
+\r
+       delete[] PA;\r
+       delete[] PB;\r
+\r
+       return scoreTotal;\r
+       }\r
+\r
+// Objective score defined as the sum of profile-sequence\r
+// scores for each sequence in the alignment. The profile\r
+// is computed from the entire alignment, so this includes\r
+// the score of each sequence against itself. This is to\r
+// avoid recomputing the profile each time, so we reduce\r
+// complexity but introduce a questionable approximation.\r
+// The goal is to see if we can exploit the apparent\r
+// improvement in performance of log-expectation score\r
+// over the usual sum-of-pairs by optimizing this\r
+// objective score in the iterative refinement stage.\r
+SCORE ObjScorePS(const MSA &msa, SCORE MatchScore[])\r
+       {\r
+       if (g_PPScore != PPSCORE_LE)\r
+               Quit("FastScoreMSA_LASimple: LA");\r
+\r
+       const unsigned uSeqCount = msa.GetSeqCount();\r
+       const unsigned uColCount = msa.GetColCount();\r
+\r
+       const ProfPos *Prof = ProfileFromMSA(msa);\r
+\r
+       if (0 != MatchScore)\r
+               for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)\r
+                       MatchScore[uColIndex] = 0;\r
+\r
+       SCORE scoreTotal = 0;\r
+       for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)\r
+               {\r
+               const WEIGHT weightSeq = msa.GetSeqWeight(uSeqIndex);\r
+               SCORE scoreSeq = 0;\r
+               for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)\r
+                       {\r
+                       const ProfPos &PP = Prof[uColIndex];\r
+                       if (msa.IsGap(uSeqIndex, uColIndex))\r
+                               {\r
+                               bool bOpen = (0 == uColIndex ||\r
+                                 !msa.IsGap(uSeqIndex, uColIndex - 1));\r
+                               bool bClose = (uColCount - 1 == uColIndex ||\r
+                                 !msa.IsGap(uSeqIndex, uColIndex + 1));\r
+\r
+                               if (bOpen)\r
+                                       scoreSeq += PP.m_scoreGapOpen;\r
+                               if (bClose)\r
+                                       scoreSeq += PP.m_scoreGapClose;\r
+                               //if (!bOpen && !bClose)\r
+                               //      scoreSeq += PP.m_scoreGapExtend;\r
+                               }\r
+                       else if (msa.IsWildcard(uSeqIndex, uColIndex))\r
+                               continue;\r
+                       else\r
+                               {\r
+                               unsigned uLetter = msa.GetLetter(uSeqIndex, uColIndex);\r
+                               const SCORE scoreMatch = PP.m_AAScores[uLetter];\r
+                               if (0 != MatchScore)\r
+                                       MatchScore[uColIndex] += weightSeq*scoreMatch;\r
+                               scoreSeq += scoreMatch;\r
+                               }\r
+                       }\r
+               scoreTotal += weightSeq*scoreSeq;\r
+               }\r
+\r
+       delete[] Prof;\r
+       return scoreTotal;\r
+       }\r
+\r
+// The XP score is the sum of the score of each pair of\r
+// sequences between two profiles which are aligned to\r
+// each other. Notice that for two given profiles aligned\r
+// in different ways, the difference in XP score must be\r
+// the same as the difference in SP score because the\r
+// score of a pair of sequences in one profile doesn't\r
+// depend on the alignment.\r
+SCORE ObjScoreXP(const MSA &msa1, const MSA &msa2)\r
+       {\r
+       const unsigned uColCount1 = msa1.GetColCount();\r
+       const unsigned uColCount2 = msa2.GetColCount();\r
+       if (uColCount1 != uColCount2)\r
+               Quit("ObjScoreXP, alignment lengths differ %u %u", uColCount1, uColCount2);\r
+\r
+       const unsigned uSeqCount1 = msa1.GetSeqCount();\r
+       const unsigned uSeqCount2 = msa2.GetSeqCount();\r
+\r
+#if    TRACE\r
+       Log("     Score  Weight  Weight       Total\n");\r
+       Log("----------  ------  ------  ----------\n");\r
+#endif\r
+\r
+       SCORE scoreTotal = 0;\r
+       unsigned uPairCount = 0;\r
+       for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount1; ++uSeqIndex1)\r
+               {\r
+               const WEIGHT w1 = msa1.GetSeqWeight(uSeqIndex1);\r
+               for (unsigned uSeqIndex2 = 0; uSeqIndex2 < uSeqCount2; ++uSeqIndex2)\r
+                       {\r
+                       const WEIGHT w2 = msa2.GetSeqWeight(uSeqIndex2);\r
+                       const WEIGHT w = w1*w2;\r
+                       SCORE scoreLetters = ScoreSeqPairLetters(msa1, uSeqIndex1, msa2, uSeqIndex2);\r
+                       SCORE scoreGaps = ScoreSeqPairGaps(msa1, uSeqIndex1, msa2, uSeqIndex2);\r
+                       SCORE scorePair = scoreLetters + scoreGaps;\r
+                       scoreTotal += w1*w2*scorePair;\r
+                       ++uPairCount;\r
+#if    TRACE\r
+                       Log("%10.2f  %6.3f  %6.3f  %10.2f  >%s >%s\n",\r
+                         scorePair,\r
+                         w1,\r
+                         w2,\r
+                         scorePair*w1*w2,\r
+                         msa1.GetSeqName(uSeqIndex1),\r
+                         msa2.GetSeqName(uSeqIndex2));\r
+#endif\r
+                       }\r
+               }\r
+       if (0 == uPairCount)\r
+               Quit("0 == uPairCount");\r
+\r
+#if    TRACE\r
+       Log("msa1=\n");\r
+       msa1.LogMe();\r
+       Log("msa2=\n");\r
+       msa2.LogMe();\r
+       Log("XP=%g\n", scoreTotal);\r
+#endif\r
+//     return scoreTotal / uPairCount;\r
+       return scoreTotal;\r
+       }\r