5 Compute Henikoff weights.
\r
6 Steven Henikoff and Jorja G. Henikoff (1994), Position-based sequence weights.
\r
7 J. Mol. Biol., 243(4):574-578.
\r
9 Award each different residue an equal share of the weight, and then to divide up
\r
10 that weight equally among the sequences sharing the same residue. So if in a
\r
11 position of a multiple alignment, r different residues are represented, a residue
\r
12 represented in only one sequence contributes a score of 1/r to that sequence, whereas a
\r
13 residue represented in s sequences contributes a score of 1/rs to each of the s
\r
14 sequences. For each sequence, the contributions from each position are summed to give
\r
17 See also HenikoffWeightPB.
\r
20 void MSA::CalcHenikoffWeightsCol(unsigned uColIndex) const
\r
22 const unsigned uSeqCount = GetSeqCount();
\r
24 // Compute letter counts in this column
\r
25 unsigned uLetterCount[MAX_ALPHA];
\r
26 memset(uLetterCount, 0, sizeof(uLetterCount));
\r
27 unsigned uDifferentLetterCount = 0;
\r
28 for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
\r
30 unsigned uLetter = GetLetterEx(uSeqIndex, uColIndex);
\r
33 unsigned uNewCount = uLetterCount[uLetter] + 1;
\r
34 uLetterCount[uLetter] = uNewCount;
\r
36 ++uDifferentLetterCount;
\r
39 // Compute weight contributions
\r
40 for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
\r
42 unsigned uLetter = GetLetterEx(uSeqIndex, uColIndex);
\r
45 const unsigned uCount = uLetterCount[uLetter];
\r
46 unsigned uDenom = uCount*uDifferentLetterCount;
\r
49 m_Weights[uSeqIndex] += (WEIGHT) (1.0/uDenom);
\r
53 void MSA::SetHenikoffWeights() const
\r
55 const unsigned uColCount = GetColCount();
\r
56 const unsigned uSeqCount = GetSeqCount();
\r
60 else if (1 == uSeqCount)
\r
62 m_Weights[0] = (WEIGHT) 1.0;
\r
65 else if (2 == uSeqCount)
\r
67 m_Weights[0] = (WEIGHT) 0.5;
\r
68 m_Weights[1] = (WEIGHT) 0.5;
\r
72 for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
\r
73 m_Weights[uSeqIndex] = 0.0;
\r
75 for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
\r
76 CalcHenikoffWeightsCol(uColIndex);
\r
78 // Set all-gap seqs weight to 0
\r
79 for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
\r
80 if (IsGapSeq(uSeqIndex))
\r
81 m_Weights[uSeqIndex] = 0.0;
\r
83 Normalize(m_Weights, uSeqCount);
\r