Mac binaries
[jabaws.git] / website / archive / binaries / mac / src / muscle / henikoffweight.cpp
diff --git a/website/archive/binaries/mac/src/muscle/henikoffweight.cpp b/website/archive/binaries/mac/src/muscle/henikoffweight.cpp
new file mode 100644 (file)
index 0000000..53f3b34
--- /dev/null
@@ -0,0 +1,84 @@
+#include "muscle.h"\r
+#include "msa.h"\r
+\r
+/***\r
+Compute Henikoff weights.\r
+Steven Henikoff and Jorja G. Henikoff (1994), Position-based sequence weights.\r
+J. Mol. Biol., 243(4):574-578.\r
+\r
+Award each different residue an equal share of the weight, and then to divide up\r
+that weight equally among the sequences sharing the same residue. So if in a\r
+position of a multiple alignment, r different residues are represented, a residue\r
+represented in only one sequence contributes a score of 1/r to that sequence, whereas a\r
+residue represented in s sequences contributes a score of 1/rs to each of the s\r
+sequences. For each sequence, the contributions from each position are summed to give\r
+a sequence weight.\r
+\r
+See also HenikoffWeightPB.\r
+***/\r
+\r
+void MSA::CalcHenikoffWeightsCol(unsigned uColIndex) const\r
+       {\r
+       const unsigned uSeqCount = GetSeqCount();\r
+\r
+// Compute letter counts in this column\r
+       unsigned uLetterCount[MAX_ALPHA];\r
+       memset(uLetterCount, 0, sizeof(uLetterCount));\r
+       unsigned uDifferentLetterCount = 0;\r
+       for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)\r
+               {\r
+               unsigned uLetter = GetLetterEx(uSeqIndex, uColIndex);\r
+               if (uLetter >= 20)\r
+                       continue;\r
+               unsigned uNewCount = uLetterCount[uLetter] + 1;\r
+               uLetterCount[uLetter] = uNewCount;\r
+               if (1 == uNewCount)\r
+                       ++uDifferentLetterCount;\r
+               }\r
+\r
+// Compute weight contributions\r
+       for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)\r
+               {\r
+               unsigned uLetter = GetLetterEx(uSeqIndex, uColIndex);\r
+               if (uLetter >= 20)\r
+                       continue;\r
+               const unsigned uCount = uLetterCount[uLetter];\r
+               unsigned uDenom = uCount*uDifferentLetterCount;\r
+               if (uDenom == 0)\r
+                       continue;\r
+               m_Weights[uSeqIndex] += (WEIGHT) (1.0/uDenom);\r
+               }\r
+       }\r
+\r
+void MSA::SetHenikoffWeights() const\r
+       {\r
+       const unsigned uColCount = GetColCount();\r
+       const unsigned uSeqCount = GetSeqCount();\r
+\r
+       if (0 == uSeqCount)\r
+               return;\r
+       else if (1 == uSeqCount)\r
+               {\r
+               m_Weights[0] = (WEIGHT) 1.0;\r
+               return;\r
+               }\r
+       else if (2 == uSeqCount)\r
+               {\r
+               m_Weights[0] = (WEIGHT) 0.5;\r
+               m_Weights[1] = (WEIGHT) 0.5;\r
+               return;\r
+               }\r
+\r
+       for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)\r
+               m_Weights[uSeqIndex] = 0.0;\r
+\r
+       for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)\r
+               CalcHenikoffWeightsCol(uColIndex);\r
+\r
+// Set all-gap seqs weight to 0\r
+       for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)\r
+               if (IsGapSeq(uSeqIndex))\r
+                       m_Weights[uSeqIndex] = 0.0;\r
+\r
+       Normalize(m_Weights, uSeqCount);\r
+       }\r