+++ /dev/null
-#include "muscle.h"\r
-#include "msa.h"\r
-#include "profile.h"\r
-\r
-#define TRACE 0\r
-\r
-static void LogF(FCOUNT f)\r
- {\r
- if (f > -0.00001 && f < 0.00001)\r
- Log(" ");\r
- else\r
- Log(" %5.3f", f);\r
- }\r
-\r
-static const char *LocalScoreToStr(SCORE s)\r
- {\r
- static char str[16];\r
- if (s < -1e10 || s > 1e10)\r
- return " *";\r
- sprintf(str, "%5.1f", s);\r
- return str;\r
- }\r
-\r
-#if DOUBLE_AFFINE\r
-void ListProfile(const ProfPos *Prof, unsigned uLength, const MSA *ptrMSA)\r
- {\r
- Log(" Pos Occ LL LG GL GG Open Close Open2 Clos2\n");\r
- Log(" --- --- -- -- -- -- ---- ----- ----- -----\n");\r
- for (unsigned n = 0; n < uLength; ++n)\r
- {\r
- const ProfPos &PP = Prof[n];\r
- Log("%5u", n);\r
- LogF(PP.m_fOcc);\r
- LogF(PP.m_LL);\r
- LogF(PP.m_LG);\r
- LogF(PP.m_GL);\r
- LogF(PP.m_GG);\r
- Log(" %s", LocalScoreToStr(-PP.m_scoreGapOpen));\r
- Log(" %s", LocalScoreToStr(-PP.m_scoreGapClose));\r
- Log(" %s", LocalScoreToStr(-PP.m_scoreGapOpen2));\r
- Log(" %s", LocalScoreToStr(-PP.m_scoreGapClose2));\r
- if (0 != ptrMSA)\r
- {\r
- const unsigned uSeqCount = ptrMSA->GetSeqCount();\r
- Log(" ");\r
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)\r
- Log("%c", ptrMSA->GetChar(uSeqIndex, n));\r
- }\r
- Log("\n");\r
- }\r
-\r
- Log("\n");\r
- Log(" Pos G");\r
- for (unsigned n = 0; n < g_AlphaSize; ++n)\r
- Log(" %c", LetterExToChar(n));\r
- Log("\n");\r
- Log(" --- -");\r
- for (unsigned n = 0; n < g_AlphaSize; ++n)\r
- Log(" -----");\r
- Log("\n");\r
-\r
- for (unsigned n = 0; n < uLength; ++n)\r
- {\r
- const ProfPos &PP = Prof[n];\r
- Log("%5u", n);\r
- if (-1 == PP.m_uResidueGroup)\r
- Log(" -", PP.m_uResidueGroup);\r
- else\r
- Log(" %d", PP.m_uResidueGroup);\r
-\r
- for (unsigned uLetter = 0; uLetter < g_AlphaSize; ++uLetter)\r
- {\r
- FCOUNT f = PP.m_fcCounts[uLetter];\r
- if (f == 0.0)\r
- Log(" ");\r
- else\r
- Log(" %5.3f", f);\r
- }\r
- if (0 != ptrMSA)\r
- {\r
- const unsigned uSeqCount = ptrMSA->GetSeqCount();\r
- Log(" ");\r
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)\r
- Log("%c", ptrMSA->GetChar(uSeqIndex, n));\r
- }\r
- Log("\n");\r
- }\r
- }\r
-#endif // DOUBLE_AFFINE\r
-\r
-#if SINGLE_AFFINE\r
-void ListProfile(const ProfPos *Prof, unsigned uLength, const MSA *ptrMSA)\r
- {\r
- Log(" Pos Occ LL LG GL GG Open Close\n");\r
- Log(" --- --- -- -- -- -- ---- -----\n");\r
- for (unsigned n = 0; n < uLength; ++n)\r
- {\r
- const ProfPos &PP = Prof[n];\r
- Log("%5u", n);\r
- LogF(PP.m_fOcc);\r
- LogF(PP.m_LL);\r
- LogF(PP.m_LG);\r
- LogF(PP.m_GL);\r
- LogF(PP.m_GG);\r
- Log(" %5.1f", -PP.m_scoreGapOpen);\r
- Log(" %5.1f", -PP.m_scoreGapClose);\r
- if (0 != ptrMSA)\r
- {\r
- const unsigned uSeqCount = ptrMSA->GetSeqCount();\r
- Log(" ");\r
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)\r
- Log("%c", ptrMSA->GetChar(uSeqIndex, n));\r
- }\r
- Log("\n");\r
- }\r
-\r
- Log("\n");\r
- Log(" Pos G");\r
- for (unsigned n = 0; n < g_AlphaSize; ++n)\r
- Log(" %c", LetterExToChar(n));\r
- Log("\n");\r
- Log(" --- -");\r
- for (unsigned n = 0; n < g_AlphaSize; ++n)\r
- Log(" -----");\r
- Log("\n");\r
-\r
- for (unsigned n = 0; n < uLength; ++n)\r
- {\r
- const ProfPos &PP = Prof[n];\r
- Log("%5u", n);\r
- if (-1 == PP.m_uResidueGroup)\r
- Log(" -", PP.m_uResidueGroup);\r
- else\r
- Log(" %d", PP.m_uResidueGroup);\r
-\r
- for (unsigned uLetter = 0; uLetter < g_AlphaSize; ++uLetter)\r
- {\r
- FCOUNT f = PP.m_fcCounts[uLetter];\r
- if (f == 0.0)\r
- Log(" ");\r
- else\r
- Log(" %5.3f", f);\r
- }\r
- if (0 != ptrMSA)\r
- {\r
- const unsigned uSeqCount = ptrMSA->GetSeqCount();\r
- Log(" ");\r
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)\r
- Log("%c", ptrMSA->GetChar(uSeqIndex, n));\r
- }\r
- Log("\n");\r
- }\r
- }\r
-#endif\r
-\r
-void SortCounts(const FCOUNT fcCounts[], unsigned SortOrder[])\r
- {\r
- static unsigned InitialSortOrder[MAX_ALPHA] =\r
- {\r
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19\r
- };\r
- memcpy(SortOrder, InitialSortOrder, g_AlphaSize*sizeof(unsigned));\r
-\r
- bool bAny = true;\r
- while (bAny)\r
- {\r
- bAny = false;\r
- for (unsigned n = 0; n < g_AlphaSize - 1; ++n)\r
- {\r
- unsigned i1 = SortOrder[n];\r
- unsigned i2 = SortOrder[n+1];\r
- if (fcCounts[i1] < fcCounts[i2])\r
- {\r
- SortOrder[n+1] = i1;\r
- SortOrder[n] = i2;\r
- bAny = true;\r
- }\r
- }\r
- }\r
- }\r
-\r
-static unsigned AminoGroupFromFCounts(const FCOUNT fcCounts[])\r
- {\r
- bool bAny = false;\r
- unsigned uConsensusResidueGroup = RESIDUE_GROUP_MULTIPLE;\r
- for (unsigned uLetter = 0; uLetter < 20; ++uLetter)\r
- {\r
- if (0 == fcCounts[uLetter])\r
- continue;\r
- const unsigned uResidueGroup = ResidueGroup[uLetter];\r
- if (bAny)\r
- {\r
- if (uResidueGroup != uConsensusResidueGroup)\r
- return RESIDUE_GROUP_MULTIPLE;\r
- }\r
- else\r
- {\r
- bAny = true;\r
- uConsensusResidueGroup = uResidueGroup;\r
- }\r
- }\r
- return uConsensusResidueGroup;\r
- }\r
-\r
-static unsigned NucleoGroupFromFCounts(const FCOUNT fcCounts[])\r
- {\r
- bool bAny = false;\r
- unsigned uConsensusResidueGroup = RESIDUE_GROUP_MULTIPLE;\r
- for (unsigned uLetter = 0; uLetter < 4; ++uLetter)\r
- {\r
- if (0 == fcCounts[uLetter])\r
- continue;\r
- const unsigned uResidueGroup = uLetter;\r
- if (bAny)\r
- {\r
- if (uResidueGroup != uConsensusResidueGroup)\r
- return RESIDUE_GROUP_MULTIPLE;\r
- }\r
- else\r
- {\r
- bAny = true;\r
- uConsensusResidueGroup = uResidueGroup;\r
- }\r
- }\r
- return uConsensusResidueGroup;\r
- }\r
-\r
-unsigned ResidueGroupFromFCounts(const FCOUNT fcCounts[])\r
- {\r
- switch (g_Alpha)\r
- {\r
- case ALPHA_Amino:\r
- return AminoGroupFromFCounts(fcCounts);\r
-\r
- case ALPHA_DNA:\r
- case ALPHA_RNA:\r
- return NucleoGroupFromFCounts(fcCounts);\r
- }\r
- Quit("ResidueGroupFromFCounts: bad alpha");\r
- return 0;\r
- }\r
-\r
-ProfPos *ProfileFromMSA(const MSA &a)\r
- {\r
- const unsigned uSeqCount = a.GetSeqCount();\r
- const unsigned uColCount = a.GetColCount();\r
-\r
-// Yuck -- cast away const (inconsistent design here).\r
- SetMSAWeightsMuscle((MSA &) a);\r
-\r
- ProfPos *Pos = new ProfPos[uColCount];\r
-\r
- unsigned uHydrophobicRunLength = 0;\r
- for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)\r
- {\r
- ProfPos &PP = Pos[uColIndex];\r
-\r
- PP.m_bAllGaps = a.IsGapColumn(uColIndex);\r
-\r
- FCOUNT fcGapStart;\r
- FCOUNT fcGapEnd;\r
- FCOUNT fcGapExtend;\r
- FCOUNT fOcc;\r
- a.GetFractionalWeightedCounts(uColIndex, g_bNormalizeCounts, PP.m_fcCounts,\r
- &fcGapStart, &fcGapEnd, &fcGapExtend, &fOcc,\r
- &PP.m_LL, &PP.m_LG, &PP.m_GL, &PP.m_GG);\r
- PP.m_fOcc = fOcc;\r
-\r
- SortCounts(PP.m_fcCounts, PP.m_uSortOrder);\r
-\r
- PP.m_uResidueGroup = ResidueGroupFromFCounts(PP.m_fcCounts);\r
-\r
- for (unsigned i = 0; i < g_AlphaSize; ++i)\r
- {\r
- SCORE scoreSum = 0;\r
- for (unsigned j = 0; j < g_AlphaSize; ++j)\r
- scoreSum += PP.m_fcCounts[j]*(*g_ptrScoreMatrix)[i][j];\r
- PP.m_AAScores[i] = scoreSum;\r
- }\r
-\r
- SCORE sStartOcc = (SCORE) (1.0 - fcGapStart);\r
- SCORE sEndOcc = (SCORE) (1.0 - fcGapEnd);\r
-\r
- PP.m_fcStartOcc = sStartOcc;\r
- PP.m_fcEndOcc = sEndOcc;\r
-\r
- PP.m_scoreGapOpen = sStartOcc*g_scoreGapOpen/2;\r
- PP.m_scoreGapClose = sEndOcc*g_scoreGapOpen/2;\r
-#if DOUBLE_AFFINE\r
- PP.m_scoreGapOpen2 = sStartOcc*g_scoreGapOpen2/2;\r
- PP.m_scoreGapClose2 = sEndOcc*g_scoreGapOpen2/2;\r
-#endif\r
-// PP.m_scoreGapExtend = (SCORE) ((1.0 - fcGapExtend)*scoreGapExtend);\r
-\r
-#if PAF\r
- if (ALHPA_Amino == g_Alpha && sStartOcc > 0.5)\r
- {\r
- extern SCORE PAFactor(const FCOUNT fcCounts[]);\r
- SCORE paf = PAFactor(PP.m_fcCounts);\r
- PP.m_scoreGapOpen *= paf;\r
- PP.m_scoreGapClose *= paf;\r
- }\r
-#endif\r
- }\r
-\r
-#if HYDRO\r
- if (ALPHA_Amino == g_Alpha)\r
- Hydro(Pos, uColCount);\r
-#endif\r
-\r
-#if TRACE\r
- {\r
- Log("ProfileFromMSA\n");\r
- ListProfile(Pos, uColCount, &a);\r
- }\r
-#endif\r
- return Pos;\r
- }\r