+++ /dev/null
-#include "muscle.h"\r
-#include "profile.h"\r
-#include "diaglist.h"\r
-\r
-#define TRACE 0\r
-\r
-#define pow4(i) (1 << (2*i)) // 4^i = 2^(2*i)\r
-const unsigned K = 7;\r
-const unsigned KTUPS = pow4(K);\r
-static unsigned TuplePos[KTUPS];\r
-\r
-static char *TupleToStr(int t)\r
- {\r
- static char s[K];\r
-\r
- for (int i = 0; i < K; ++i)\r
- {\r
- unsigned Letter = (t/(pow4(i)))%4;\r
- assert(Letter >= 0 && Letter < 4);\r
- s[K-i-1] = LetterToChar(Letter);\r
- }\r
-\r
- return s;\r
- }\r
-\r
-static unsigned GetTuple(const ProfPos *PP, unsigned uPos)\r
- {\r
- unsigned t = 0;\r
-\r
- for (unsigned i = 0; i < K; ++i)\r
- {\r
- const unsigned uLetter = PP[uPos+i].m_uResidueGroup;\r
- if (RESIDUE_GROUP_MULTIPLE == uLetter)\r
- return EMPTY;\r
- t = t*4 + uLetter;\r
- }\r
-\r
- return t;\r
- }\r
-\r
-void FindDiagsNuc(const ProfPos *PX, unsigned uLengthX, const ProfPos *PY,\r
- unsigned uLengthY, DiagList &DL)\r
- {\r
- if (ALPHA_DNA != g_Alpha && ALPHA_RNA != g_Alpha)\r
- Quit("FindDiagsNuc: requires nucleo alphabet");\r
-\r
- DL.Clear();\r
-\r
-// 16 is arbitrary slop, no principled reason for this.\r
- if (uLengthX < K + 16 || uLengthY < K + 16)\r
- return;\r
-\r
-// Set A to shorter profile, B to longer\r
- const ProfPos *PA;\r
- const ProfPos *PB;\r
- unsigned uLengthA;\r
- unsigned uLengthB;\r
- bool bSwap;\r
- if (uLengthX < uLengthY)\r
- {\r
- bSwap = false;\r
- PA = PX;\r
- PB = PY;\r
- uLengthA = uLengthX;\r
- uLengthB = uLengthY;\r
- }\r
- else\r
- {\r
- bSwap = true;\r
- PA = PY;\r
- PB = PX;\r
- uLengthA = uLengthY;\r
- uLengthB = uLengthX;\r
- }\r
-\r
-#if TRACE\r
- Log("FindDiagsNuc(LengthA=%d LengthB=%d\n", uLengthA, uLengthB);\r
-#endif\r
-\r
-// Build tuple map for the longer profile, B\r
- if (uLengthB < K)\r
- Quit("FindDiags: profile too short");\r
-\r
- memset(TuplePos, EMPTY, sizeof(TuplePos));\r
-\r
- for (unsigned uPos = 0; uPos < uLengthB - K; ++uPos)\r
- {\r
- const unsigned uTuple = GetTuple(PB, uPos);\r
- if (EMPTY == uTuple)\r
- continue;\r
- TuplePos[uTuple] = uPos;\r
- }\r
-\r
-// Find matches\r
- for (unsigned uPosA = 0; uPosA < uLengthA - K; ++uPosA)\r
- {\r
- const unsigned uTuple = GetTuple(PA, uPosA);\r
- if (EMPTY == uTuple)\r
- continue;\r
- const unsigned uPosB = TuplePos[uTuple];\r
- if (EMPTY == uPosB)\r
- continue;\r
-\r
- // This tuple is found in both profiles\r
- unsigned uStartPosA = uPosA;\r
- unsigned uStartPosB = uPosB;\r
-\r
- // Try to extend the match forwards\r
- unsigned uEndPosA = uPosA + K - 1;\r
- unsigned uEndPosB = uPosB + K - 1;\r
- for (;;)\r
- {\r
- if (uLengthA - 1 == uEndPosA || uLengthB - 1 == uEndPosB)\r
- break;\r
- const unsigned uAAGroupA = PA[uEndPosA+1].m_uResidueGroup;\r
- if (RESIDUE_GROUP_MULTIPLE == uAAGroupA)\r
- break;\r
- const unsigned uAAGroupB = PB[uEndPosB+1].m_uResidueGroup;\r
- if (RESIDUE_GROUP_MULTIPLE == uAAGroupB)\r
- break;\r
- if (uAAGroupA != uAAGroupB)\r
- break;\r
- ++uEndPosA;\r
- ++uEndPosB;\r
- }\r
- uPosA = uEndPosA;\r
-\r
-#if TRACE\r
- {\r
- Log("Match: A %4u-%4u ", uStartPosA, uEndPosA);\r
- for (unsigned n = uStartPosA; n <= uEndPosA; ++n)\r
- Log("%c", LetterToChar(PA[n].m_uResidueGroup));\r
- Log("\n");\r
- Log(" B %4u-%4u ", uStartPosB, uEndPosB);\r
- for (unsigned n = uStartPosB; n <= uEndPosB; ++n)\r
- Log("%c", LetterToChar(PB[n].m_uResidueGroup));\r
- Log("\n");\r
- }\r
-#endif\r
-\r
- const unsigned uLength = uEndPosA - uStartPosA + 1;\r
- assert(uEndPosB - uStartPosB + 1 == uLength);\r
-\r
- if (uLength >= g_uMinDiagLength)\r
- {\r
- if (bSwap)\r
- DL.Add(uStartPosB, uStartPosA, uLength);\r
- else\r
- DL.Add(uStartPosA, uStartPosB, uLength);\r
- }\r
- }\r
- }\r