Mac binaries
[jabaws.git] / website / archive / binaries / mac / src / muscle / cons.cpp
diff --git a/website/archive/binaries/mac/src/muscle/cons.cpp b/website/archive/binaries/mac/src/muscle/cons.cpp
new file mode 100644 (file)
index 0000000..c1507c5
--- /dev/null
@@ -0,0 +1,118 @@
+/***\r
+Conservation value for a column in an MSA is defined as the number\r
+of times the most common letter appears divided by the number of\r
+sequences.\r
+***/\r
+\r
+#include "muscle.h"\r
+#include "msa.h"\r
+#include <math.h>\r
+\r
+double MSA::GetAvgCons() const\r
+       {\r
+       assert(GetSeqCount() > 0);\r
+       double dSum = 0;\r
+       unsigned uNonGapColCount = 0;\r
+       for (unsigned uColIndex = 0; uColIndex < GetColCount(); ++uColIndex)\r
+               {\r
+               if (!IsGapColumn(uColIndex))\r
+                       {\r
+                       dSum += GetCons(uColIndex);\r
+                       ++uNonGapColCount;\r
+                       }\r
+               }\r
+       assert(uNonGapColCount > 0);\r
+       double dAvg = dSum / uNonGapColCount;\r
+       assert(dAvg > 0 && dAvg <= 1);\r
+       return dAvg;\r
+       }\r
+\r
+double MSA::GetCons(unsigned uColIndex) const\r
+       {\r
+       unsigned Counts[MAX_ALPHA];\r
+       for (unsigned uLetter = 0; uLetter < g_AlphaSize; ++uLetter)\r
+               Counts[uLetter] = 0;\r
+\r
+       unsigned uMaxCount = 0;\r
+       for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)\r
+               {\r
+               if (IsGap(uSeqIndex, uColIndex))\r
+                       continue;\r
+               char c = GetChar(uSeqIndex, uColIndex);\r
+               c = toupper(c);\r
+               if ('X' == c || 'B' == c || 'Z' == c)\r
+                       continue;\r
+               unsigned uLetter = GetLetter(uSeqIndex, uColIndex);\r
+               unsigned uCount = Counts[uLetter] + 1;\r
+               if (uCount > uMaxCount)\r
+                       uMaxCount = uCount;\r
+               Counts[uLetter] = uCount;\r
+               }\r
+\r
+// Cons is undefined for all-gap column\r
+       if (0 == uMaxCount)\r
+               {\r
+//             assert(false);\r
+               return 1;\r
+               }\r
+\r
+       double dCons = (double) uMaxCount / (double) GetSeqCount();\r
+       assert(dCons > 0 && dCons <= 1);\r
+       return dCons;\r
+       }\r
+\r
+// Perecent identity of a pair of sequences.\r
+// Positions with one or both gapped are ignored.\r
+double MSA::GetPctIdentityPair(unsigned uSeqIndex1, unsigned uSeqIndex2) const\r
+       {\r
+       const unsigned uColCount = GetColCount();\r
+       unsigned uPosCount = 0;\r
+       unsigned uSameCount = 0;\r
+       for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)\r
+               {\r
+               const char c1 = GetChar(uSeqIndex1, uColIndex);\r
+               const char c2 = GetChar(uSeqIndex2, uColIndex);\r
+               if (IsGapChar(c1) || IsGapChar(c2))\r
+                       continue;\r
+               if (c1 == c2)\r
+                       ++uSameCount;\r
+               ++uPosCount;\r
+               }\r
+       if (0 == uPosCount)\r
+               return 0;\r
+       return (double) uSameCount / (double) uPosCount;\r
+       }\r
+\r
+// Perecent group identity of a pair of sequences.\r
+// Positions with one or both gapped are ignored.\r
+double MSA::GetPctGroupIdentityPair(unsigned uSeqIndex1,\r
+  unsigned uSeqIndex2) const\r
+       {\r
+       extern unsigned ResidueGroup[];\r
+\r
+       const unsigned uColCount = GetColCount();\r
+       unsigned uPosCount = 0;\r
+       unsigned uSameCount = 0;\r
+       for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)\r
+               {\r
+               if (IsGap(uSeqIndex1, uColIndex))\r
+                       continue;\r
+               if (IsGap(uSeqIndex2, uColIndex))\r
+                       continue;\r
+               if (IsWildcard(uSeqIndex1, uColIndex))\r
+                       continue;\r
+               if (IsWildcard(uSeqIndex2, uColIndex))\r
+                       continue;\r
+\r
+               const unsigned uLetter1 = GetLetter(uSeqIndex1, uColIndex);\r
+               const unsigned uLetter2 = GetLetter(uSeqIndex2, uColIndex);\r
+               const unsigned uGroup1 = ResidueGroup[uLetter1];\r
+               const unsigned uGroup2 = ResidueGroup[uLetter2];\r
+               if (uGroup1 == uGroup2)\r
+                       ++uSameCount;\r
+               ++uPosCount;\r
+               }\r
+       if (0 == uPosCount)\r
+               return 0;\r
+       return (double) uSameCount / (double) uPosCount;\r
+       }\r