7 static void LogF(FCOUNT f)
\r
9 if (f > -0.00001 && f < 0.00001)
\r
15 static const char *LocalScoreToStr(SCORE s)
\r
17 static char str[16];
\r
18 if (s < -1e10 || s > 1e10)
\r
20 sprintf(str, "%5.1f", s);
\r
25 void ListProfile(const ProfPos *Prof, unsigned uLength, const MSA *ptrMSA)
\r
27 Log(" Pos Occ LL LG GL GG Open Close Open2 Clos2\n");
\r
28 Log(" --- --- -- -- -- -- ---- ----- ----- -----\n");
\r
29 for (unsigned n = 0; n < uLength; ++n)
\r
31 const ProfPos &PP = Prof[n];
\r
38 Log(" %s", LocalScoreToStr(-PP.m_scoreGapOpen));
\r
39 Log(" %s", LocalScoreToStr(-PP.m_scoreGapClose));
\r
40 Log(" %s", LocalScoreToStr(-PP.m_scoreGapOpen2));
\r
41 Log(" %s", LocalScoreToStr(-PP.m_scoreGapClose2));
\r
44 const unsigned uSeqCount = ptrMSA->GetSeqCount();
\r
46 for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
\r
47 Log("%c", ptrMSA->GetChar(uSeqIndex, n));
\r
54 for (unsigned n = 0; n < g_AlphaSize; ++n)
\r
55 Log(" %c", LetterExToChar(n));
\r
58 for (unsigned n = 0; n < g_AlphaSize; ++n)
\r
62 for (unsigned n = 0; n < uLength; ++n)
\r
64 const ProfPos &PP = Prof[n];
\r
66 if (-1 == PP.m_uResidueGroup)
\r
67 Log(" -", PP.m_uResidueGroup);
\r
69 Log(" %d", PP.m_uResidueGroup);
\r
71 for (unsigned uLetter = 0; uLetter < g_AlphaSize; ++uLetter)
\r
73 FCOUNT f = PP.m_fcCounts[uLetter];
\r
81 const unsigned uSeqCount = ptrMSA->GetSeqCount();
\r
83 for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
\r
84 Log("%c", ptrMSA->GetChar(uSeqIndex, n));
\r
89 #endif // DOUBLE_AFFINE
\r
92 void ListProfile(const ProfPos *Prof, unsigned uLength, const MSA *ptrMSA)
\r
94 Log(" Pos Occ LL LG GL GG Open Close\n");
\r
95 Log(" --- --- -- -- -- -- ---- -----\n");
\r
96 for (unsigned n = 0; n < uLength; ++n)
\r
98 const ProfPos &PP = Prof[n];
\r
105 Log(" %5.1f", -PP.m_scoreGapOpen);
\r
106 Log(" %5.1f", -PP.m_scoreGapClose);
\r
109 const unsigned uSeqCount = ptrMSA->GetSeqCount();
\r
111 for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
\r
112 Log("%c", ptrMSA->GetChar(uSeqIndex, n));
\r
119 for (unsigned n = 0; n < g_AlphaSize; ++n)
\r
120 Log(" %c", LetterExToChar(n));
\r
123 for (unsigned n = 0; n < g_AlphaSize; ++n)
\r
127 for (unsigned n = 0; n < uLength; ++n)
\r
129 const ProfPos &PP = Prof[n];
\r
131 if (-1 == PP.m_uResidueGroup)
\r
132 Log(" -", PP.m_uResidueGroup);
\r
134 Log(" %d", PP.m_uResidueGroup);
\r
136 for (unsigned uLetter = 0; uLetter < g_AlphaSize; ++uLetter)
\r
138 FCOUNT f = PP.m_fcCounts[uLetter];
\r
146 const unsigned uSeqCount = ptrMSA->GetSeqCount();
\r
148 for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
\r
149 Log("%c", ptrMSA->GetChar(uSeqIndex, n));
\r
156 void SortCounts(const FCOUNT fcCounts[], unsigned SortOrder[])
\r
158 static unsigned InitialSortOrder[MAX_ALPHA] =
\r
160 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19
\r
162 memcpy(SortOrder, InitialSortOrder, g_AlphaSize*sizeof(unsigned));
\r
168 for (unsigned n = 0; n < g_AlphaSize - 1; ++n)
\r
170 unsigned i1 = SortOrder[n];
\r
171 unsigned i2 = SortOrder[n+1];
\r
172 if (fcCounts[i1] < fcCounts[i2])
\r
174 SortOrder[n+1] = i1;
\r
182 static unsigned AminoGroupFromFCounts(const FCOUNT fcCounts[])
\r
185 unsigned uConsensusResidueGroup = RESIDUE_GROUP_MULTIPLE;
\r
186 for (unsigned uLetter = 0; uLetter < 20; ++uLetter)
\r
188 if (0 == fcCounts[uLetter])
\r
190 const unsigned uResidueGroup = ResidueGroup[uLetter];
\r
193 if (uResidueGroup != uConsensusResidueGroup)
\r
194 return RESIDUE_GROUP_MULTIPLE;
\r
199 uConsensusResidueGroup = uResidueGroup;
\r
202 return uConsensusResidueGroup;
\r
205 static unsigned NucleoGroupFromFCounts(const FCOUNT fcCounts[])
\r
208 unsigned uConsensusResidueGroup = RESIDUE_GROUP_MULTIPLE;
\r
209 for (unsigned uLetter = 0; uLetter < 4; ++uLetter)
\r
211 if (0 == fcCounts[uLetter])
\r
213 const unsigned uResidueGroup = uLetter;
\r
216 if (uResidueGroup != uConsensusResidueGroup)
\r
217 return RESIDUE_GROUP_MULTIPLE;
\r
222 uConsensusResidueGroup = uResidueGroup;
\r
225 return uConsensusResidueGroup;
\r
228 unsigned ResidueGroupFromFCounts(const FCOUNT fcCounts[])
\r
233 return AminoGroupFromFCounts(fcCounts);
\r
237 return NucleoGroupFromFCounts(fcCounts);
\r
239 Quit("ResidueGroupFromFCounts: bad alpha");
\r
243 ProfPos *ProfileFromMSA(const MSA &a)
\r
245 const unsigned uSeqCount = a.GetSeqCount();
\r
246 const unsigned uColCount = a.GetColCount();
\r
248 // Yuck -- cast away const (inconsistent design here).
\r
249 SetMSAWeightsMuscle((MSA &) a);
\r
251 ProfPos *Pos = new ProfPos[uColCount];
\r
253 unsigned uHydrophobicRunLength = 0;
\r
254 for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
\r
256 ProfPos &PP = Pos[uColIndex];
\r
258 PP.m_bAllGaps = a.IsGapColumn(uColIndex);
\r
262 FCOUNT fcGapExtend;
\r
264 a.GetFractionalWeightedCounts(uColIndex, g_bNormalizeCounts, PP.m_fcCounts,
\r
265 &fcGapStart, &fcGapEnd, &fcGapExtend, &fOcc,
\r
266 &PP.m_LL, &PP.m_LG, &PP.m_GL, &PP.m_GG);
\r
269 SortCounts(PP.m_fcCounts, PP.m_uSortOrder);
\r
271 PP.m_uResidueGroup = ResidueGroupFromFCounts(PP.m_fcCounts);
\r
273 for (unsigned i = 0; i < g_AlphaSize; ++i)
\r
275 SCORE scoreSum = 0;
\r
276 for (unsigned j = 0; j < g_AlphaSize; ++j)
\r
277 scoreSum += PP.m_fcCounts[j]*(*g_ptrScoreMatrix)[i][j];
\r
278 PP.m_AAScores[i] = scoreSum;
\r
281 SCORE sStartOcc = (SCORE) (1.0 - fcGapStart);
\r
282 SCORE sEndOcc = (SCORE) (1.0 - fcGapEnd);
\r
284 PP.m_fcStartOcc = sStartOcc;
\r
285 PP.m_fcEndOcc = sEndOcc;
\r
287 PP.m_scoreGapOpen = sStartOcc*g_scoreGapOpen/2;
\r
288 PP.m_scoreGapClose = sEndOcc*g_scoreGapOpen/2;
\r
290 PP.m_scoreGapOpen2 = sStartOcc*g_scoreGapOpen2/2;
\r
291 PP.m_scoreGapClose2 = sEndOcc*g_scoreGapOpen2/2;
\r
293 // PP.m_scoreGapExtend = (SCORE) ((1.0 - fcGapExtend)*scoreGapExtend);
\r
296 if (ALHPA_Amino == g_Alpha && sStartOcc > 0.5)
\r
298 extern SCORE PAFactor(const FCOUNT fcCounts[]);
\r
299 SCORE paf = PAFactor(PP.m_fcCounts);
\r
300 PP.m_scoreGapOpen *= paf;
\r
301 PP.m_scoreGapClose *= paf;
\r
307 if (ALPHA_Amino == g_Alpha)
\r
308 Hydro(Pos, uColCount);
\r
313 Log("ProfileFromMSA\n");
\r
314 ListProfile(Pos, uColCount, &a);
\r