Next version of JABA
[jabaws.git] / binaries / src / muscle / anchors.cpp
1 #include "muscle.h"\r
2 #include "msa.h"\r
3 #include "objscore.h"\r
4 \r
5 #define TRACE   0\r
6 \r
7 static void WindowSmooth(const SCORE Score[], unsigned uCount, unsigned uWindowLength,\r
8   SCORE SmoothScore[], double dCeil)\r
9         {\r
10 #define Ceil(x) ((SCORE) ((x) > dCeil ? dCeil : (x)))\r
11 \r
12         if (1 != uWindowLength%2)\r
13                 Quit("WindowSmooth=%u must be odd", uWindowLength);\r
14 \r
15         if (uCount <= uWindowLength)\r
16                 {\r
17                 for (unsigned i = 0; i < uCount; ++i)\r
18                         SmoothScore[i] = 0;\r
19                 return;\r
20                 }\r
21 \r
22         const unsigned w2 = uWindowLength/2;\r
23         for (unsigned i = 0; i < w2; ++i)\r
24                 {\r
25                 SmoothScore[i] = 0;\r
26                 SmoothScore[uCount - i - 1] = 0;\r
27                 }\r
28 \r
29         SCORE scoreWindowTotal = 0;\r
30         for (unsigned i = 0; i < uWindowLength; ++i)\r
31                 {\r
32                 scoreWindowTotal += Ceil(Score[i]);\r
33                 }\r
34 \r
35         for (unsigned i = w2; ; ++i)\r
36                 {\r
37                 SmoothScore[i] = scoreWindowTotal/uWindowLength;\r
38                 if (i == uCount - w2 - 1)\r
39                         break;\r
40 \r
41                 scoreWindowTotal -= Ceil(Score[i - w2]);\r
42                 scoreWindowTotal += Ceil(Score[i + w2 + 1]);\r
43                 }\r
44 #undef Ceil\r
45         }\r
46 \r
47 // Find columns that score above the given threshold.\r
48 // A range of scores is defined between the average\r
49 // and the maximum. The threshold is a fraction 0.0 .. 1.0\r
50 // within that range, where 0.0 is the average score\r
51 // and 1.0 is the maximum score.\r
52 // "Grade" is by analogy with grading on a curve.\r
53 static void FindBestColsGrade(const SCORE Score[], unsigned uCount,\r
54   double dThreshold, unsigned BestCols[], unsigned *ptruBestColCount)\r
55         {\r
56         SCORE scoreTotal = 0;\r
57         for (unsigned uIndex = 0; uIndex < uCount; ++uIndex)\r
58                 scoreTotal += Score[uIndex];\r
59         const SCORE scoreAvg = scoreTotal / uCount;\r
60 \r
61         SCORE scoreMax = MINUS_INFINITY;\r
62         for (unsigned uIndex = 0; uIndex < uCount; ++uIndex)\r
63                 if (Score[uIndex] > scoreMax)\r
64                         scoreMax = Score[uIndex];\r
65 \r
66         unsigned uBestColCount = 0;\r
67         for (unsigned uIndex = 0; uIndex < uCount; ++uIndex)\r
68                 {\r
69                 const SCORE s = Score[uIndex];\r
70                 const double dHeight = (s - scoreAvg)/(scoreMax - scoreAvg);\r
71                 if (dHeight >= dThreshold)\r
72                         {\r
73                         BestCols[uBestColCount] = uIndex;\r
74                         ++uBestColCount;\r
75                         }\r
76                 }\r
77         *ptruBestColCount = uBestColCount;\r
78         }\r
79 \r
80 // Best col only if all following criteria satisfied:\r
81 // (1) Score >= min\r
82 // (2) Smoothed score >= min\r
83 // (3) No gaps.\r
84 static void FindBestColsCombo(const MSA &msa, const SCORE Score[],\r
85   const SCORE SmoothScore[], double dMinScore, double dMinSmoothScore,\r
86   unsigned BestCols[], unsigned *ptruBestColCount)\r
87         {\r
88         const unsigned uColCount = msa.GetColCount();\r
89 \r
90         unsigned uBestColCount = 0;\r
91         for (unsigned uIndex = 0; uIndex < uColCount; ++uIndex)\r
92                 {\r
93                 if (Score[uIndex] < dMinScore)\r
94                         continue;\r
95                 if (SmoothScore[uIndex] < dMinSmoothScore)\r
96                         continue;\r
97                 if (msa.ColumnHasGap(uIndex))\r
98                         continue;\r
99                 BestCols[uBestColCount] = uIndex;\r
100                 ++uBestColCount;\r
101                 }\r
102         *ptruBestColCount = uBestColCount;\r
103         }\r
104 \r
105 static void ListBestCols(const MSA &msa, const SCORE Score[], const SCORE SmoothScore[],\r
106   unsigned BestCols[], unsigned uBestColCount)\r
107         {\r
108         const unsigned uColCount = msa.GetColCount();\r
109         const unsigned uSeqCount = msa.GetSeqCount();\r
110 \r
111         Log("Col  ");\r
112         for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)\r
113                 Log("%u", uSeqIndex%10);\r
114         Log("  ");\r
115 \r
116         for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)\r
117                 {\r
118                 Log("%3u  ", uColIndex);\r
119                 for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)\r
120                         Log("%c", msa.GetChar(uSeqIndex, uColIndex));\r
121 \r
122                 Log("  %10.3f", Score[uColIndex]);\r
123                 Log("  %10.3f", SmoothScore[uColIndex]);\r
124 \r
125                 for (unsigned i = 0; i < uBestColCount; ++i)\r
126                         if (BestCols[i] == uColIndex)\r
127                                 Log(" <-- Best");\r
128                 Log("\n");\r
129                 }\r
130         }\r
131 \r
132 // If two best columns are found within a window, choose\r
133 // the highest-scoring. If more than two, choose the one\r
134 // closest to the center of the window.\r
135 static void MergeBestCols(const SCORE Scores[], const unsigned BestCols[],\r
136   unsigned uBestColCount, unsigned uWindowLength, unsigned AnchorCols[],\r
137   unsigned *ptruAnchorColCount)\r
138         {\r
139         unsigned uAnchorColCount = 0;\r
140         for (unsigned n = 0; n < uBestColCount; /* update inside loop */)\r
141                 {\r
142                 unsigned uBestColIndex = BestCols[n];\r
143                 unsigned uCountWithinWindow = 0;\r
144                 for (unsigned i = n + 1; i < uBestColCount; ++i)\r
145                         {\r
146                         unsigned uBestColIndex2 = BestCols[i];\r
147                         if (uBestColIndex2 - uBestColIndex >= uWindowLength)\r
148                                 break;\r
149                         ++uCountWithinWindow;\r
150                         }\r
151                 unsigned uAnchorCol = uBestColIndex;\r
152                 if (1 == uCountWithinWindow)\r
153                         {\r
154                         unsigned uBestColIndex2 = BestCols[n+1];\r
155                         if (Scores[uBestColIndex] > Scores[uBestColIndex2])\r
156                                 uAnchorCol = uBestColIndex;\r
157                         else\r
158                                 uAnchorCol = uBestColIndex2;\r
159                         }\r
160                 else if (uCountWithinWindow > 1)\r
161                         {\r
162                         unsigned uWindowCenter = uBestColIndex + uWindowLength/2;\r
163                         int iClosestDist = uWindowLength;\r
164                         unsigned uClosestCol = uBestColIndex;\r
165                         for (unsigned i = n + 1; i < n + uCountWithinWindow; ++i)\r
166                                 {\r
167                                 unsigned uColIndex = BestCols[i];\r
168                                 int iDist = uColIndex - uBestColIndex;\r
169                                 if (iDist < 0)\r
170                                         iDist = -iDist;\r
171                                 if (iDist < iClosestDist)\r
172                                         {\r
173                                         uClosestCol = uColIndex;\r
174                                         iClosestDist = iDist;\r
175                                         }\r
176                                 }\r
177                         uAnchorCol = uClosestCol;\r
178                         }\r
179                 AnchorCols[uAnchorColCount] = uAnchorCol;\r
180                 ++uAnchorColCount;\r
181                 n += uCountWithinWindow + 1;\r
182                 }\r
183         *ptruAnchorColCount = uAnchorColCount;\r
184         }\r
185 \r
186 void FindAnchorCols(const MSA &msa, unsigned AnchorCols[],\r
187   unsigned *ptruAnchorColCount)\r
188         {\r
189         const unsigned uColCount = msa.GetColCount();\r
190         if (uColCount < 16)\r
191                 {\r
192                 *ptruAnchorColCount = 0;\r
193                 return;\r
194                 }\r
195 \r
196         SCORE *MatchScore = new SCORE[uColCount];\r
197         SCORE *SmoothScore = new SCORE[uColCount];\r
198         unsigned *BestCols = new unsigned[uColCount];\r
199 \r
200         GetLetterScores(msa, MatchScore);\r
201         WindowSmooth(MatchScore, uColCount, g_uSmoothWindowLength, SmoothScore,\r
202           g_dSmoothScoreCeil);\r
203 \r
204         unsigned uBestColCount;\r
205         FindBestColsCombo(msa, MatchScore, SmoothScore, g_dMinBestColScore, g_dMinSmoothScore,\r
206           BestCols, &uBestColCount);\r
207 \r
208 #if     TRACE\r
209         ListBestCols(msa, MatchScore, SmoothScore, BestCols, uBestColCount);\r
210 #endif\r
211 \r
212         MergeBestCols(MatchScore, BestCols, uBestColCount, g_uAnchorSpacing, AnchorCols,\r
213           ptruAnchorColCount);\r
214 \r
215         delete[] MatchScore;\r
216         delete[] SmoothScore;\r
217         delete[] BestCols;\r
218         }\r