2 Conservation value for a column in an MSA is defined as the number
\r
3 of times the most common letter appears divided by the number of
\r
11 double MSA::GetAvgCons() const
\r
13 assert(GetSeqCount() > 0);
\r
15 unsigned uNonGapColCount = 0;
\r
16 for (unsigned uColIndex = 0; uColIndex < GetColCount(); ++uColIndex)
\r
18 if (!IsGapColumn(uColIndex))
\r
20 dSum += GetCons(uColIndex);
\r
24 assert(uNonGapColCount > 0);
\r
25 double dAvg = dSum / uNonGapColCount;
\r
26 assert(dAvg > 0 && dAvg <= 1);
\r
30 double MSA::GetCons(unsigned uColIndex) const
\r
32 unsigned Counts[MAX_ALPHA];
\r
33 for (unsigned uLetter = 0; uLetter < g_AlphaSize; ++uLetter)
\r
34 Counts[uLetter] = 0;
\r
36 unsigned uMaxCount = 0;
\r
37 for (unsigned uSeqIndex = 0; uSeqIndex < GetSeqCount(); ++uSeqIndex)
\r
39 if (IsGap(uSeqIndex, uColIndex))
\r
41 char c = GetChar(uSeqIndex, uColIndex);
\r
43 if ('X' == c || 'B' == c || 'Z' == c)
\r
45 unsigned uLetter = GetLetter(uSeqIndex, uColIndex);
\r
46 unsigned uCount = Counts[uLetter] + 1;
\r
47 if (uCount > uMaxCount)
\r
49 Counts[uLetter] = uCount;
\r
52 // Cons is undefined for all-gap column
\r
59 double dCons = (double) uMaxCount / (double) GetSeqCount();
\r
60 assert(dCons > 0 && dCons <= 1);
\r
64 // Perecent identity of a pair of sequences.
\r
65 // Positions with one or both gapped are ignored.
\r
66 double MSA::GetPctIdentityPair(unsigned uSeqIndex1, unsigned uSeqIndex2) const
\r
68 const unsigned uColCount = GetColCount();
\r
69 unsigned uPosCount = 0;
\r
70 unsigned uSameCount = 0;
\r
71 for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
\r
73 const char c1 = GetChar(uSeqIndex1, uColIndex);
\r
74 const char c2 = GetChar(uSeqIndex2, uColIndex);
\r
75 if (IsGapChar(c1) || IsGapChar(c2))
\r
83 return (double) uSameCount / (double) uPosCount;
\r
86 // Perecent group identity of a pair of sequences.
\r
87 // Positions with one or both gapped are ignored.
\r
88 double MSA::GetPctGroupIdentityPair(unsigned uSeqIndex1,
\r
89 unsigned uSeqIndex2) const
\r
91 extern unsigned ResidueGroup[];
\r
93 const unsigned uColCount = GetColCount();
\r
94 unsigned uPosCount = 0;
\r
95 unsigned uSameCount = 0;
\r
96 for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
\r
98 if (IsGap(uSeqIndex1, uColIndex))
\r
100 if (IsGap(uSeqIndex2, uColIndex))
\r
102 if (IsWildcard(uSeqIndex1, uColIndex))
\r
104 if (IsWildcard(uSeqIndex2, uColIndex))
\r
107 const unsigned uLetter1 = GetLetter(uSeqIndex1, uColIndex);
\r
108 const unsigned uLetter2 = GetLetter(uSeqIndex2, uColIndex);
\r
109 const unsigned uGroup1 = ResidueGroup[uLetter1];
\r
110 const unsigned uGroup2 = ResidueGroup[uLetter2];
\r
111 if (uGroup1 == uGroup2)
\r
115 if (0 == uPosCount)
\r
117 return (double) uSameCount / (double) uPosCount;
\r