+++ /dev/null
-#include "muscle.h"\r
-#include "msa.h"\r
-#include "profile.h"\r
-#include "objscore.h"\r
-\r
-#define TRACE 0\r
-#define TRACE_SEQPAIR 0\r
-#define TEST_SPFAST 0\r
-\r
-extern SCOREMATRIX VTML_LA;\r
-extern SCOREMATRIX PAM200;\r
-extern SCOREMATRIX PAM200NoCenter;\r
-extern SCOREMATRIX VTML_SP;\r
-extern SCOREMATRIX VTML_SPNoCenter;\r
-extern SCOREMATRIX NUC_SP;\r
-\r
-SCORE g_SPScoreLetters;\r
-SCORE g_SPScoreGaps;\r
-\r
-static SCORE TermGapScore(bool Gap)\r
- {\r
- switch (g_TermGaps)\r
- {\r
- case TERMGAPS_Full:\r
- return 0;\r
-\r
- case TERMGAPS_Half:\r
- if (Gap)\r
- return g_scoreGapOpen/2;\r
- return 0;\r
-\r
- case TERMGAPS_Ext:\r
- if (Gap)\r
- return g_scoreGapExtend;\r
- return 0;\r
- }\r
- Quit("TermGapScore?!");\r
- return 0;\r
- }\r
-\r
-SCORE ScoreSeqPairLetters(const MSA &msa1, unsigned uSeqIndex1,\r
- const MSA &msa2, unsigned uSeqIndex2)\r
- {\r
- const unsigned uColCount = msa1.GetColCount();\r
- const unsigned uColCount2 = msa2.GetColCount();\r
- if (uColCount != uColCount2)\r
- Quit("ScoreSeqPairLetters, different lengths");\r
-\r
-#if TRACE_SEQPAIR\r
- {\r
- Log("\n");\r
- Log("ScoreSeqPairLetters\n");\r
- MSA msaTmp;\r
- msaTmp.SetSize(2, uColCount);\r
- msaTmp.CopySeq(0, msa1, uSeqIndex1);\r
- msaTmp.CopySeq(1, msa2, uSeqIndex2);\r
- msaTmp.LogMe();\r
- }\r
-#endif\r
-\r
- SCORE scoreLetters = 0;\r
- SCORE scoreGaps = 0;\r
- bool bGapping1 = false;\r
- bool bGapping2 = false;\r
-\r
- unsigned uColStart = 0;\r
- bool bLeftTermGap = false;\r
- for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)\r
- {\r
- bool bGap1 = msa1.IsGap(uSeqIndex1, uColIndex);\r
- bool bGap2 = msa2.IsGap(uSeqIndex2, uColIndex);\r
- if (!bGap1 || !bGap2)\r
- {\r
- if (bGap1 || bGap2)\r
- bLeftTermGap = true;\r
- uColStart = uColIndex;\r
- break;\r
- }\r
- }\r
-\r
- unsigned uColEnd = uColCount - 1;\r
- bool bRightTermGap = false;\r
- for (int iColIndex = (int) uColCount - 1; iColIndex >= 0; --iColIndex)\r
- {\r
- bool bGap1 = msa1.IsGap(uSeqIndex1, iColIndex);\r
- bool bGap2 = msa2.IsGap(uSeqIndex2, iColIndex);\r
- if (!bGap1 || !bGap2)\r
- {\r
- if (bGap1 || bGap2)\r
- bRightTermGap = true;\r
- uColEnd = (unsigned) iColIndex;\r
- break;\r
- }\r
- }\r
-\r
-#if TRACE_SEQPAIR\r
- Log("LeftTermGap=%d RightTermGap=%d\n", bLeftTermGap, bRightTermGap);\r
-#endif\r
-\r
- for (unsigned uColIndex = uColStart; uColIndex <= uColEnd; ++uColIndex)\r
- {\r
- unsigned uLetter1 = msa1.GetLetterEx(uSeqIndex1, uColIndex);\r
- if (uLetter1 >= g_AlphaSize)\r
- continue;\r
- unsigned uLetter2 = msa2.GetLetterEx(uSeqIndex2, uColIndex);\r
- if (uLetter2 >= g_AlphaSize)\r
- continue;\r
-\r
- SCORE scoreMatch = (*g_ptrScoreMatrix)[uLetter1][uLetter2];\r
- scoreLetters += scoreMatch;\r
- }\r
- return scoreLetters;\r
- }\r
-\r
-SCORE ScoreSeqPairGaps(const MSA &msa1, unsigned uSeqIndex1,\r
- const MSA &msa2, unsigned uSeqIndex2)\r
- {\r
- const unsigned uColCount = msa1.GetColCount();\r
- const unsigned uColCount2 = msa2.GetColCount();\r
- if (uColCount != uColCount2)\r
- Quit("ScoreSeqPairGaps, different lengths");\r
-\r
-#if TRACE_SEQPAIR\r
- {\r
- Log("\n");\r
- Log("ScoreSeqPairGaps\n");\r
- MSA msaTmp;\r
- msaTmp.SetSize(2, uColCount);\r
- msaTmp.CopySeq(0, msa1, uSeqIndex1);\r
- msaTmp.CopySeq(1, msa2, uSeqIndex2);\r
- msaTmp.LogMe();\r
- }\r
-#endif\r
-\r
- SCORE scoreGaps = 0;\r
- bool bGapping1 = false;\r
- bool bGapping2 = false;\r
-\r
- unsigned uColStart = 0;\r
- bool bLeftTermGap = false;\r
- for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)\r
- {\r
- bool bGap1 = msa1.IsGap(uSeqIndex1, uColIndex);\r
- bool bGap2 = msa2.IsGap(uSeqIndex2, uColIndex);\r
- if (!bGap1 || !bGap2)\r
- {\r
- if (bGap1 || bGap2)\r
- bLeftTermGap = true;\r
- uColStart = uColIndex;\r
- break;\r
- }\r
- }\r
-\r
- unsigned uColEnd = uColCount - 1;\r
- bool bRightTermGap = false;\r
- for (int iColIndex = (int) uColCount - 1; iColIndex >= 0; --iColIndex)\r
- {\r
- bool bGap1 = msa1.IsGap(uSeqIndex1, iColIndex);\r
- bool bGap2 = msa2.IsGap(uSeqIndex2, iColIndex);\r
- if (!bGap1 || !bGap2)\r
- {\r
- if (bGap1 || bGap2)\r
- bRightTermGap = true;\r
- uColEnd = (unsigned) iColIndex;\r
- break;\r
- }\r
- }\r
-\r
-#if TRACE_SEQPAIR\r
- Log("LeftTermGap=%d RightTermGap=%d\n", bLeftTermGap, bRightTermGap);\r
-#endif\r
-\r
- for (unsigned uColIndex = uColStart; uColIndex <= uColEnd; ++uColIndex)\r
- {\r
- bool bGap1 = msa1.IsGap(uSeqIndex1, uColIndex);\r
- bool bGap2 = msa2.IsGap(uSeqIndex2, uColIndex);\r
-\r
- if (bGap1 && bGap2)\r
- continue;\r
-\r
- if (bGap1)\r
- {\r
- if (!bGapping1)\r
- {\r
-#if TRACE_SEQPAIR\r
- Log("Gap open seq 1 col %d\n", uColIndex);\r
-#endif\r
- if (uColIndex == uColStart)\r
- scoreGaps += TermGapScore(true);\r
- else\r
- scoreGaps += g_scoreGapOpen;\r
- bGapping1 = true;\r
- }\r
- else\r
- scoreGaps += g_scoreGapExtend;\r
- continue;\r
- }\r
-\r
- else if (bGap2)\r
- {\r
- if (!bGapping2)\r
- {\r
-#if TRACE_SEQPAIR\r
- Log("Gap open seq 2 col %d\n", uColIndex);\r
-#endif\r
- if (uColIndex == uColStart)\r
- scoreGaps += TermGapScore(true);\r
- else\r
- scoreGaps += g_scoreGapOpen;\r
- bGapping2 = true;\r
- }\r
- else\r
- scoreGaps += g_scoreGapExtend;\r
- continue;\r
- }\r
-\r
- bGapping1 = false;\r
- bGapping2 = false;\r
- }\r
-\r
- if (bGapping1 || bGapping2)\r
- {\r
- scoreGaps -= g_scoreGapOpen;\r
- scoreGaps += TermGapScore(true);\r
- }\r
- return scoreGaps;\r
- }\r
-\r
-// The usual sum-of-pairs objective score: sum the score\r
-// of the alignment of each pair of sequences.\r
-SCORE ObjScoreSP(const MSA &msa, SCORE MatchScore[])\r
- {\r
-#if TRACE\r
- Log("==================ObjScoreSP==============\n");\r
- Log("msa=\n");\r
- msa.LogMe();\r
-#endif\r
- g_SPScoreLetters = 0;\r
- g_SPScoreGaps = 0;\r
-\r
- if (0 != MatchScore)\r
- {\r
- const unsigned uColCount = msa.GetColCount();\r
- for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)\r
- MatchScore[uColIndex] = 0;\r
- }\r
-\r
- const unsigned uSeqCount = msa.GetSeqCount();\r
- SCORE scoreTotal = 0;\r
- unsigned uPairCount = 0;\r
-#if TRACE\r
- Log("Seq1 Seq2 wt1 wt2 Letters Gaps Unwt.Score Wt.Score Total\n");\r
- Log("---- ---- ------ ------ ---------- ---------- ---------- ---------- ----------\n");\r
-#endif\r
- for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount; ++uSeqIndex1)\r
- {\r
- const WEIGHT w1 = msa.GetSeqWeight(uSeqIndex1);\r
- for (unsigned uSeqIndex2 = uSeqIndex1 + 1; uSeqIndex2 < uSeqCount; ++uSeqIndex2)\r
- {\r
- const WEIGHT w2 = msa.GetSeqWeight(uSeqIndex2);\r
- const WEIGHT w = w1*w2;\r
-\r
- SCORE scoreLetters = ScoreSeqPairLetters(msa, uSeqIndex1, msa, uSeqIndex2);\r
- SCORE scoreGaps = ScoreSeqPairGaps(msa, uSeqIndex1, msa, uSeqIndex2);\r
- SCORE scorePair = scoreLetters + scoreGaps;\r
- ++uPairCount;\r
-\r
- scoreTotal += w*scorePair;\r
-\r
- g_SPScoreLetters += w*scoreLetters;\r
- g_SPScoreGaps += w*scoreGaps;\r
-#if TRACE\r
- Log("%4d %4d %6.3f %6.3f %10.2f %10.2f %10.2f %10.2f %10.2f >%s >%s\n",\r
- uSeqIndex1,\r
- uSeqIndex2,\r
- w1,\r
- w2,\r
- scoreLetters,\r
- scoreGaps,\r
- scorePair,\r
- scorePair*w1*w2,\r
- scoreTotal,\r
- msa.GetSeqName(uSeqIndex1),\r
- msa.GetSeqName(uSeqIndex2));\r
-#endif\r
- }\r
- }\r
-#if TEST_SPFAST\r
- {\r
- SCORE f = ObjScoreSPFast(msa);\r
- Log("Fast = %.6g\n", f);\r
- Log("Brute = %.6g\n", scoreTotal);\r
- if (BTEq(f, scoreTotal))\r
- Log("Agree\n");\r
- else\r
- Log("** DISAGREE **\n");\r
- }\r
-#endif\r
-// return scoreTotal / uPairCount;\r
- return scoreTotal;\r
- }\r
-\r
-// Objective score defined as the dynamic programming score.\r
-// Input is two alignments, which must be of the same length.\r
-// Result is the same profile-profile score that is optimized\r
-// by dynamic programming.\r
-SCORE ObjScoreDP(const MSA &msa1, const MSA &msa2, SCORE MatchScore[])\r
- {\r
- const unsigned uColCount = msa1.GetColCount();\r
- if (msa2.GetColCount() != uColCount)\r
- Quit("ObjScoreDP, must be same length");\r
-\r
- const unsigned uColCount1 = msa1.GetColCount();\r
- const unsigned uColCount2 = msa2.GetColCount();\r
-\r
- const ProfPos *PA = ProfileFromMSA(msa1);\r
- const ProfPos *PB = ProfileFromMSA(msa2);\r
-\r
- return ObjScoreDP_Profs(PA, PB, uColCount1, MatchScore);\r
- }\r
-\r
-SCORE ObjScoreDP_Profs(const ProfPos *PA, const ProfPos *PB, unsigned uColCount,\r
- SCORE MatchScore[])\r
- {\r
-//#if TRACE\r
-// Log("Profile 1:\n");\r
-// ListProfile(PA, uColCount, &msa1);\r
-//\r
-// Log("Profile 2:\n");\r
-// ListProfile(PB, uColCount, &msa2);\r
-//#endif\r
-\r
- SCORE scoreTotal = 0;\r
-\r
- for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)\r
- {\r
- const ProfPos &PPA = PA[uColIndex];\r
- const ProfPos &PPB = PB[uColIndex];\r
-\r
- SCORE scoreGap = 0;\r
- SCORE scoreMatch = 0;\r
- // If gapped column...\r
- if (PPA.m_bAllGaps && PPB.m_bAllGaps)\r
- scoreGap = 0;\r
- else if (PPA.m_bAllGaps)\r
- {\r
- if (uColCount - 1 == uColIndex || !PA[uColIndex+1].m_bAllGaps)\r
- scoreGap = PPB.m_scoreGapClose;\r
- if (0 == uColIndex || !PA[uColIndex-1].m_bAllGaps)\r
- scoreGap += PPB.m_scoreGapOpen;\r
- //if (0 == scoreGap)\r
- // scoreGap = PPB.m_scoreGapExtend;\r
- }\r
- else if (PPB.m_bAllGaps)\r
- {\r
- if (uColCount - 1 == uColIndex || !PB[uColIndex+1].m_bAllGaps)\r
- scoreGap = PPA.m_scoreGapClose;\r
- if (0 == uColIndex || !PB[uColIndex-1].m_bAllGaps)\r
- scoreGap += PPA.m_scoreGapOpen;\r
- //if (0 == scoreGap)\r
- // scoreGap = PPA.m_scoreGapExtend;\r
- }\r
- else\r
- scoreMatch = ScoreProfPos2(PPA, PPB);\r
-\r
- if (0 != MatchScore)\r
- MatchScore[uColIndex] = scoreMatch;\r
-\r
- scoreTotal += scoreMatch + scoreGap;\r
-\r
- extern bool g_bTracePPScore;\r
- extern MSA *g_ptrPPScoreMSA1;\r
- extern MSA *g_ptrPPScoreMSA2;\r
- if (g_bTracePPScore)\r
- {\r
- const MSA &msa1 = *g_ptrPPScoreMSA1;\r
- const MSA &msa2 = *g_ptrPPScoreMSA2;\r
- const unsigned uSeqCount1 = msa1.GetSeqCount();\r
- const unsigned uSeqCount2 = msa2.GetSeqCount();\r
-\r
- for (unsigned n = 0; n < uSeqCount1; ++n)\r
- Log("%c", msa1.GetChar(n, uColIndex));\r
- Log(" ");\r
- for (unsigned n = 0; n < uSeqCount2; ++n)\r
- Log("%c", msa2.GetChar(n, uColIndex));\r
- Log(" %10.3f", scoreMatch);\r
- if (scoreGap != 0)\r
- Log(" %10.3f", scoreGap);\r
- Log("\n");\r
- }\r
- }\r
-\r
- delete[] PA;\r
- delete[] PB;\r
-\r
- return scoreTotal;\r
- }\r
-\r
-// Objective score defined as the sum of profile-sequence\r
-// scores for each sequence in the alignment. The profile\r
-// is computed from the entire alignment, so this includes\r
-// the score of each sequence against itself. This is to\r
-// avoid recomputing the profile each time, so we reduce\r
-// complexity but introduce a questionable approximation.\r
-// The goal is to see if we can exploit the apparent\r
-// improvement in performance of log-expectation score\r
-// over the usual sum-of-pairs by optimizing this\r
-// objective score in the iterative refinement stage.\r
-SCORE ObjScorePS(const MSA &msa, SCORE MatchScore[])\r
- {\r
- if (g_PPScore != PPSCORE_LE)\r
- Quit("FastScoreMSA_LASimple: LA");\r
-\r
- const unsigned uSeqCount = msa.GetSeqCount();\r
- const unsigned uColCount = msa.GetColCount();\r
-\r
- const ProfPos *Prof = ProfileFromMSA(msa);\r
-\r
- if (0 != MatchScore)\r
- for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)\r
- MatchScore[uColIndex] = 0;\r
-\r
- SCORE scoreTotal = 0;\r
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)\r
- {\r
- const WEIGHT weightSeq = msa.GetSeqWeight(uSeqIndex);\r
- SCORE scoreSeq = 0;\r
- for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)\r
- {\r
- const ProfPos &PP = Prof[uColIndex];\r
- if (msa.IsGap(uSeqIndex, uColIndex))\r
- {\r
- bool bOpen = (0 == uColIndex ||\r
- !msa.IsGap(uSeqIndex, uColIndex - 1));\r
- bool bClose = (uColCount - 1 == uColIndex ||\r
- !msa.IsGap(uSeqIndex, uColIndex + 1));\r
-\r
- if (bOpen)\r
- scoreSeq += PP.m_scoreGapOpen;\r
- if (bClose)\r
- scoreSeq += PP.m_scoreGapClose;\r
- //if (!bOpen && !bClose)\r
- // scoreSeq += PP.m_scoreGapExtend;\r
- }\r
- else if (msa.IsWildcard(uSeqIndex, uColIndex))\r
- continue;\r
- else\r
- {\r
- unsigned uLetter = msa.GetLetter(uSeqIndex, uColIndex);\r
- const SCORE scoreMatch = PP.m_AAScores[uLetter];\r
- if (0 != MatchScore)\r
- MatchScore[uColIndex] += weightSeq*scoreMatch;\r
- scoreSeq += scoreMatch;\r
- }\r
- }\r
- scoreTotal += weightSeq*scoreSeq;\r
- }\r
-\r
- delete[] Prof;\r
- return scoreTotal;\r
- }\r
-\r
-// The XP score is the sum of the score of each pair of\r
-// sequences between two profiles which are aligned to\r
-// each other. Notice that for two given profiles aligned\r
-// in different ways, the difference in XP score must be\r
-// the same as the difference in SP score because the\r
-// score of a pair of sequences in one profile doesn't\r
-// depend on the alignment.\r
-SCORE ObjScoreXP(const MSA &msa1, const MSA &msa2)\r
- {\r
- const unsigned uColCount1 = msa1.GetColCount();\r
- const unsigned uColCount2 = msa2.GetColCount();\r
- if (uColCount1 != uColCount2)\r
- Quit("ObjScoreXP, alignment lengths differ %u %u", uColCount1, uColCount2);\r
-\r
- const unsigned uSeqCount1 = msa1.GetSeqCount();\r
- const unsigned uSeqCount2 = msa2.GetSeqCount();\r
-\r
-#if TRACE\r
- Log(" Score Weight Weight Total\n");\r
- Log("---------- ------ ------ ----------\n");\r
-#endif\r
-\r
- SCORE scoreTotal = 0;\r
- unsigned uPairCount = 0;\r
- for (unsigned uSeqIndex1 = 0; uSeqIndex1 < uSeqCount1; ++uSeqIndex1)\r
- {\r
- const WEIGHT w1 = msa1.GetSeqWeight(uSeqIndex1);\r
- for (unsigned uSeqIndex2 = 0; uSeqIndex2 < uSeqCount2; ++uSeqIndex2)\r
- {\r
- const WEIGHT w2 = msa2.GetSeqWeight(uSeqIndex2);\r
- const WEIGHT w = w1*w2;\r
- SCORE scoreLetters = ScoreSeqPairLetters(msa1, uSeqIndex1, msa2, uSeqIndex2);\r
- SCORE scoreGaps = ScoreSeqPairGaps(msa1, uSeqIndex1, msa2, uSeqIndex2);\r
- SCORE scorePair = scoreLetters + scoreGaps;\r
- scoreTotal += w1*w2*scorePair;\r
- ++uPairCount;\r
-#if TRACE\r
- Log("%10.2f %6.3f %6.3f %10.2f >%s >%s\n",\r
- scorePair,\r
- w1,\r
- w2,\r
- scorePair*w1*w2,\r
- msa1.GetSeqName(uSeqIndex1),\r
- msa2.GetSeqName(uSeqIndex2));\r
-#endif\r
- }\r
- }\r
- if (0 == uPairCount)\r
- Quit("0 == uPairCount");\r
-\r
-#if TRACE\r
- Log("msa1=\n");\r
- msa1.LogMe();\r
- Log("msa2=\n");\r
- msa2.LogMe();\r
- Log("XP=%g\n", scoreTotal);\r
-#endif\r
-// return scoreTotal / uPairCount;\r
- return scoreTotal;\r
- }\r