3 #include "objscore.h"
\r
7 static void WindowSmooth(const SCORE Score[], unsigned uCount, unsigned uWindowLength,
\r
8 SCORE SmoothScore[], double dCeil)
\r
10 #define Ceil(x) ((SCORE) ((x) > dCeil ? dCeil : (x)))
\r
12 if (1 != uWindowLength%2)
\r
13 Quit("WindowSmooth=%u must be odd", uWindowLength);
\r
15 if (uCount <= uWindowLength)
\r
17 for (unsigned i = 0; i < uCount; ++i)
\r
22 const unsigned w2 = uWindowLength/2;
\r
23 for (unsigned i = 0; i < w2; ++i)
\r
26 SmoothScore[uCount - i - 1] = 0;
\r
29 SCORE scoreWindowTotal = 0;
\r
30 for (unsigned i = 0; i < uWindowLength; ++i)
\r
32 scoreWindowTotal += Ceil(Score[i]);
\r
35 for (unsigned i = w2; ; ++i)
\r
37 SmoothScore[i] = scoreWindowTotal/uWindowLength;
\r
38 if (i == uCount - w2 - 1)
\r
41 scoreWindowTotal -= Ceil(Score[i - w2]);
\r
42 scoreWindowTotal += Ceil(Score[i + w2 + 1]);
\r
47 // Find columns that score above the given threshold.
\r
48 // A range of scores is defined between the average
\r
49 // and the maximum. The threshold is a fraction 0.0 .. 1.0
\r
50 // within that range, where 0.0 is the average score
\r
51 // and 1.0 is the maximum score.
\r
52 // "Grade" is by analogy with grading on a curve.
\r
53 static void FindBestColsGrade(const SCORE Score[], unsigned uCount,
\r
54 double dThreshold, unsigned BestCols[], unsigned *ptruBestColCount)
\r
56 SCORE scoreTotal = 0;
\r
57 for (unsigned uIndex = 0; uIndex < uCount; ++uIndex)
\r
58 scoreTotal += Score[uIndex];
\r
59 const SCORE scoreAvg = scoreTotal / uCount;
\r
61 SCORE scoreMax = MINUS_INFINITY;
\r
62 for (unsigned uIndex = 0; uIndex < uCount; ++uIndex)
\r
63 if (Score[uIndex] > scoreMax)
\r
64 scoreMax = Score[uIndex];
\r
66 unsigned uBestColCount = 0;
\r
67 for (unsigned uIndex = 0; uIndex < uCount; ++uIndex)
\r
69 const SCORE s = Score[uIndex];
\r
70 const double dHeight = (s - scoreAvg)/(scoreMax - scoreAvg);
\r
71 if (dHeight >= dThreshold)
\r
73 BestCols[uBestColCount] = uIndex;
\r
77 *ptruBestColCount = uBestColCount;
\r
80 // Best col only if all following criteria satisfied:
\r
82 // (2) Smoothed score >= min
\r
84 static void FindBestColsCombo(const MSA &msa, const SCORE Score[],
\r
85 const SCORE SmoothScore[], double dMinScore, double dMinSmoothScore,
\r
86 unsigned BestCols[], unsigned *ptruBestColCount)
\r
88 const unsigned uColCount = msa.GetColCount();
\r
90 unsigned uBestColCount = 0;
\r
91 for (unsigned uIndex = 0; uIndex < uColCount; ++uIndex)
\r
93 if (Score[uIndex] < dMinScore)
\r
95 if (SmoothScore[uIndex] < dMinSmoothScore)
\r
97 if (msa.ColumnHasGap(uIndex))
\r
99 BestCols[uBestColCount] = uIndex;
\r
102 *ptruBestColCount = uBestColCount;
\r
105 static void ListBestCols(const MSA &msa, const SCORE Score[], const SCORE SmoothScore[],
\r
106 unsigned BestCols[], unsigned uBestColCount)
\r
108 const unsigned uColCount = msa.GetColCount();
\r
109 const unsigned uSeqCount = msa.GetSeqCount();
\r
112 for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
\r
113 Log("%u", uSeqIndex%10);
\r
116 for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)
\r
118 Log("%3u ", uColIndex);
\r
119 for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)
\r
120 Log("%c", msa.GetChar(uSeqIndex, uColIndex));
\r
122 Log(" %10.3f", Score[uColIndex]);
\r
123 Log(" %10.3f", SmoothScore[uColIndex]);
\r
125 for (unsigned i = 0; i < uBestColCount; ++i)
\r
126 if (BestCols[i] == uColIndex)
\r
132 // If two best columns are found within a window, choose
\r
133 // the highest-scoring. If more than two, choose the one
\r
134 // closest to the center of the window.
\r
135 static void MergeBestCols(const SCORE Scores[], const unsigned BestCols[],
\r
136 unsigned uBestColCount, unsigned uWindowLength, unsigned AnchorCols[],
\r
137 unsigned *ptruAnchorColCount)
\r
139 unsigned uAnchorColCount = 0;
\r
140 for (unsigned n = 0; n < uBestColCount; /* update inside loop */)
\r
142 unsigned uBestColIndex = BestCols[n];
\r
143 unsigned uCountWithinWindow = 0;
\r
144 for (unsigned i = n + 1; i < uBestColCount; ++i)
\r
146 unsigned uBestColIndex2 = BestCols[i];
\r
147 if (uBestColIndex2 - uBestColIndex >= uWindowLength)
\r
149 ++uCountWithinWindow;
\r
151 unsigned uAnchorCol = uBestColIndex;
\r
152 if (1 == uCountWithinWindow)
\r
154 unsigned uBestColIndex2 = BestCols[n+1];
\r
155 if (Scores[uBestColIndex] > Scores[uBestColIndex2])
\r
156 uAnchorCol = uBestColIndex;
\r
158 uAnchorCol = uBestColIndex2;
\r
160 else if (uCountWithinWindow > 1)
\r
162 unsigned uWindowCenter = uBestColIndex + uWindowLength/2;
\r
163 int iClosestDist = uWindowLength;
\r
164 unsigned uClosestCol = uBestColIndex;
\r
165 for (unsigned i = n + 1; i < n + uCountWithinWindow; ++i)
\r
167 unsigned uColIndex = BestCols[i];
\r
168 int iDist = uColIndex - uBestColIndex;
\r
171 if (iDist < iClosestDist)
\r
173 uClosestCol = uColIndex;
\r
174 iClosestDist = iDist;
\r
177 uAnchorCol = uClosestCol;
\r
179 AnchorCols[uAnchorColCount] = uAnchorCol;
\r
181 n += uCountWithinWindow + 1;
\r
183 *ptruAnchorColCount = uAnchorColCount;
\r
186 void FindAnchorCols(const MSA &msa, unsigned AnchorCols[],
\r
187 unsigned *ptruAnchorColCount)
\r
189 const unsigned uColCount = msa.GetColCount();
\r
190 if (uColCount < 16)
\r
192 *ptruAnchorColCount = 0;
\r
196 SCORE *MatchScore = new SCORE[uColCount];
\r
197 SCORE *SmoothScore = new SCORE[uColCount];
\r
198 unsigned *BestCols = new unsigned[uColCount];
\r
200 GetLetterScores(msa, MatchScore);
\r
201 WindowSmooth(MatchScore, uColCount, g_uSmoothWindowLength, SmoothScore,
\r
202 g_dSmoothScoreCeil);
\r
204 unsigned uBestColCount;
\r
205 FindBestColsCombo(msa, MatchScore, SmoothScore, g_dMinBestColScore, g_dMinSmoothScore,
\r
206 BestCols, &uBestColCount);
\r
209 ListBestCols(msa, MatchScore, SmoothScore, BestCols, uBestColCount);
\r
212 MergeBestCols(MatchScore, BestCols, uBestColCount, g_uAnchorSpacing, AnchorCols,
\r
213 ptruAnchorColCount);
\r
215 delete[] MatchScore;
\r
216 delete[] SmoothScore;
\r