+++ /dev/null
-#include "muscle.h"\r
-#include "msa.h"\r
-#include "objscore.h"\r
-\r
-#define TRACE 0\r
-\r
-static void WindowSmooth(const SCORE Score[], unsigned uCount, unsigned uWindowLength,\r
- SCORE SmoothScore[], double dCeil)\r
- {\r
-#define Ceil(x) ((SCORE) ((x) > dCeil ? dCeil : (x)))\r
-\r
- if (1 != uWindowLength%2)\r
- Quit("WindowSmooth=%u must be odd", uWindowLength);\r
-\r
- if (uCount <= uWindowLength)\r
- {\r
- for (unsigned i = 0; i < uCount; ++i)\r
- SmoothScore[i] = 0;\r
- return;\r
- }\r
-\r
- const unsigned w2 = uWindowLength/2;\r
- for (unsigned i = 0; i < w2; ++i)\r
- {\r
- SmoothScore[i] = 0;\r
- SmoothScore[uCount - i - 1] = 0;\r
- }\r
-\r
- SCORE scoreWindowTotal = 0;\r
- for (unsigned i = 0; i < uWindowLength; ++i)\r
- {\r
- scoreWindowTotal += Ceil(Score[i]);\r
- }\r
-\r
- for (unsigned i = w2; ; ++i)\r
- {\r
- SmoothScore[i] = scoreWindowTotal/uWindowLength;\r
- if (i == uCount - w2 - 1)\r
- break;\r
-\r
- scoreWindowTotal -= Ceil(Score[i - w2]);\r
- scoreWindowTotal += Ceil(Score[i + w2 + 1]);\r
- }\r
-#undef Ceil\r
- }\r
-\r
-// Find columns that score above the given threshold.\r
-// A range of scores is defined between the average\r
-// and the maximum. The threshold is a fraction 0.0 .. 1.0\r
-// within that range, where 0.0 is the average score\r
-// and 1.0 is the maximum score.\r
-// "Grade" is by analogy with grading on a curve.\r
-static void FindBestColsGrade(const SCORE Score[], unsigned uCount,\r
- double dThreshold, unsigned BestCols[], unsigned *ptruBestColCount)\r
- {\r
- SCORE scoreTotal = 0;\r
- for (unsigned uIndex = 0; uIndex < uCount; ++uIndex)\r
- scoreTotal += Score[uIndex];\r
- const SCORE scoreAvg = scoreTotal / uCount;\r
-\r
- SCORE scoreMax = MINUS_INFINITY;\r
- for (unsigned uIndex = 0; uIndex < uCount; ++uIndex)\r
- if (Score[uIndex] > scoreMax)\r
- scoreMax = Score[uIndex];\r
-\r
- unsigned uBestColCount = 0;\r
- for (unsigned uIndex = 0; uIndex < uCount; ++uIndex)\r
- {\r
- const SCORE s = Score[uIndex];\r
- const double dHeight = (s - scoreAvg)/(scoreMax - scoreAvg);\r
- if (dHeight >= dThreshold)\r
- {\r
- BestCols[uBestColCount] = uIndex;\r
- ++uBestColCount;\r
- }\r
- }\r
- *ptruBestColCount = uBestColCount;\r
- }\r
-\r
-// Best col only if all following criteria satisfied:\r
-// (1) Score >= min\r
-// (2) Smoothed score >= min\r
-// (3) No gaps.\r
-static void FindBestColsCombo(const MSA &msa, const SCORE Score[],\r
- const SCORE SmoothScore[], double dMinScore, double dMinSmoothScore,\r
- unsigned BestCols[], unsigned *ptruBestColCount)\r
- {\r
- const unsigned uColCount = msa.GetColCount();\r
-\r
- unsigned uBestColCount = 0;\r
- for (unsigned uIndex = 0; uIndex < uColCount; ++uIndex)\r
- {\r
- if (Score[uIndex] < dMinScore)\r
- continue;\r
- if (SmoothScore[uIndex] < dMinSmoothScore)\r
- continue;\r
- if (msa.ColumnHasGap(uIndex))\r
- continue;\r
- BestCols[uBestColCount] = uIndex;\r
- ++uBestColCount;\r
- }\r
- *ptruBestColCount = uBestColCount;\r
- }\r
-\r
-static void ListBestCols(const MSA &msa, const SCORE Score[], const SCORE SmoothScore[],\r
- unsigned BestCols[], unsigned uBestColCount)\r
- {\r
- const unsigned uColCount = msa.GetColCount();\r
- const unsigned uSeqCount = msa.GetSeqCount();\r
-\r
- Log("Col ");\r
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)\r
- Log("%u", uSeqIndex%10);\r
- Log(" ");\r
-\r
- for (unsigned uColIndex = 0; uColIndex < uColCount; ++uColIndex)\r
- {\r
- Log("%3u ", uColIndex);\r
- for (unsigned uSeqIndex = 0; uSeqIndex < uSeqCount; ++uSeqIndex)\r
- Log("%c", msa.GetChar(uSeqIndex, uColIndex));\r
-\r
- Log(" %10.3f", Score[uColIndex]);\r
- Log(" %10.3f", SmoothScore[uColIndex]);\r
-\r
- for (unsigned i = 0; i < uBestColCount; ++i)\r
- if (BestCols[i] == uColIndex)\r
- Log(" <-- Best");\r
- Log("\n");\r
- }\r
- }\r
-\r
-// If two best columns are found within a window, choose\r
-// the highest-scoring. If more than two, choose the one\r
-// closest to the center of the window.\r
-static void MergeBestCols(const SCORE Scores[], const unsigned BestCols[],\r
- unsigned uBestColCount, unsigned uWindowLength, unsigned AnchorCols[],\r
- unsigned *ptruAnchorColCount)\r
- {\r
- unsigned uAnchorColCount = 0;\r
- for (unsigned n = 0; n < uBestColCount; /* update inside loop */)\r
- {\r
- unsigned uBestColIndex = BestCols[n];\r
- unsigned uCountWithinWindow = 0;\r
- for (unsigned i = n + 1; i < uBestColCount; ++i)\r
- {\r
- unsigned uBestColIndex2 = BestCols[i];\r
- if (uBestColIndex2 - uBestColIndex >= uWindowLength)\r
- break;\r
- ++uCountWithinWindow;\r
- }\r
- unsigned uAnchorCol = uBestColIndex;\r
- if (1 == uCountWithinWindow)\r
- {\r
- unsigned uBestColIndex2 = BestCols[n+1];\r
- if (Scores[uBestColIndex] > Scores[uBestColIndex2])\r
- uAnchorCol = uBestColIndex;\r
- else\r
- uAnchorCol = uBestColIndex2;\r
- }\r
- else if (uCountWithinWindow > 1)\r
- {\r
- unsigned uWindowCenter = uBestColIndex + uWindowLength/2;\r
- int iClosestDist = uWindowLength;\r
- unsigned uClosestCol = uBestColIndex;\r
- for (unsigned i = n + 1; i < n + uCountWithinWindow; ++i)\r
- {\r
- unsigned uColIndex = BestCols[i];\r
- int iDist = uColIndex - uBestColIndex;\r
- if (iDist < 0)\r
- iDist = -iDist;\r
- if (iDist < iClosestDist)\r
- {\r
- uClosestCol = uColIndex;\r
- iClosestDist = iDist;\r
- }\r
- }\r
- uAnchorCol = uClosestCol;\r
- }\r
- AnchorCols[uAnchorColCount] = uAnchorCol;\r
- ++uAnchorColCount;\r
- n += uCountWithinWindow + 1;\r
- }\r
- *ptruAnchorColCount = uAnchorColCount;\r
- }\r
-\r
-void FindAnchorCols(const MSA &msa, unsigned AnchorCols[],\r
- unsigned *ptruAnchorColCount)\r
- {\r
- const unsigned uColCount = msa.GetColCount();\r
- if (uColCount < 16)\r
- {\r
- *ptruAnchorColCount = 0;\r
- return;\r
- }\r
-\r
- SCORE *MatchScore = new SCORE[uColCount];\r
- SCORE *SmoothScore = new SCORE[uColCount];\r
- unsigned *BestCols = new unsigned[uColCount];\r
-\r
- GetLetterScores(msa, MatchScore);\r
- WindowSmooth(MatchScore, uColCount, g_uSmoothWindowLength, SmoothScore,\r
- g_dSmoothScoreCeil);\r
-\r
- unsigned uBestColCount;\r
- FindBestColsCombo(msa, MatchScore, SmoothScore, g_dMinBestColScore, g_dMinSmoothScore,\r
- BestCols, &uBestColCount);\r
-\r
-#if TRACE\r
- ListBestCols(msa, MatchScore, SmoothScore, BestCols, uBestColCount);\r
-#endif\r
-\r
- MergeBestCols(MatchScore, BestCols, uBestColCount, g_uAnchorSpacing, AnchorCols,\r
- ptruAnchorColCount);\r
-\r
- delete[] MatchScore;\r
- delete[] SmoothScore;\r
- delete[] BestCols;\r
- }\r