+++ /dev/null
-#include "muscle.h"\r
-#include "msa.h"\r
-\r
-/***\r
-Compute Henikoff weights.\r
-Steven Henikoff and Jorja G. Henikoff (1994), Position-based sequence weights.\r
-J. Mol. Biol., 243(4):574-578.\r
-\r
-Award each different residue an equal share of the weight, and then to divide up\r
-that weight equally among the sequences sharing the same residue. So if in a\r
-position of a multiple alignment, r different residues are represented, a residue\r
-represented in only one sequence contributes a score of 1/r to that sequence, whereas a\r
-residue represented in s sequences contributes a score of 1/rs to each of the s\r
-sequences. For each sequence, the contributions from each position are summed to give\r
-a sequence weight.\r
-\r
-See also HenikoffWeightPB.\r
-***/\r
-\r
-void MSA::CalcHenikoffWeightsCol(unsigned uColIndex) const\r
- {\r
- const unsigned uSeqCount = GetSeqCount();\r
-\r
-// Compute letter counts in this column\r
- unsigned uLetterCount[MAX_ALPHA];\r
- memset(uLetterCount, 0, sizeof(uLetterCount));\r
- unsigned uDifferentLetterCount = 0;\r
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)\r
- {\r
- unsigned uLetter = GetLetterEx(uSeqIndex, uColIndex);\r
- if (uLetter >= 20)\r
- continue;\r
- unsigned uNewCount = uLetterCount[uLetter] + 1;\r
- uLetterCount[uLetter] = uNewCount;\r
- if (1 == uNewCount)\r
- ++uDifferentLetterCount;\r
- }\r
-\r
-// Compute weight contributions\r
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)\r
- {\r
- unsigned uLetter = GetLetterEx(uSeqIndex, uColIndex);\r
- if (uLetter >= 20)\r
- continue;\r
- const unsigned uCount = uLetterCount[uLetter];\r
- unsigned uDenom = uCount*uDifferentLetterCount;\r
- if (uDenom == 0)\r
- continue;\r
- m_Weights[uSeqIndex] += (WEIGHT) (1.0/uDenom);\r
- }\r
- }\r
-\r
-void MSA::SetHenikoffWeights() const\r
- {\r
- const unsigned uColCount = GetColCount();\r
- const unsigned uSeqCount = GetSeqCount();\r
-\r
- if (0 == uSeqCount)\r
- return;\r
- else if (1 == uSeqCount)\r
- {\r
- m_Weights[0] = (WEIGHT) 1.0;\r
- return;\r
- }\r
- else if (2 == uSeqCount)\r
- {\r
- m_Weights[0] = (WEIGHT) 0.5;\r
- m_Weights[1] = (WEIGHT) 0.5;\r
- return;\r
- }\r
-\r
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)\r
- m_Weights[uSeqIndex] = 0.0;\r
-\r
- for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)\r
- CalcHenikoffWeightsCol(uColIndex);\r
-\r
-// Set all-gap seqs weight to 0\r
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)\r
- if (IsGapSeq(uSeqIndex))\r
- m_Weights[uSeqIndex] = 0.0;\r
-\r
- Normalize(m_Weights, uSeqCount);\r
- }\r