--- /dev/null
+#include "muscle.h"\r
+#include "profile.h"\r
+#include "diaglist.h"\r
+\r
+#define TRACE 0\r
+\r
+#define pow4(i) (1 << (2*i)) // 4^i = 2^(2*i)\r
+const unsigned K = 7;\r
+const unsigned KTUPS = pow4(K);\r
+static unsigned TuplePos[KTUPS];\r
+\r
+static char *TupleToStr(int t)\r
+ {\r
+ static char s[K];\r
+\r
+ for (int i = 0; i < K; ++i)\r
+ {\r
+ unsigned Letter = (t/(pow4(i)))%4;\r
+ assert(Letter >= 0 && Letter < 4);\r
+ s[K-i-1] = LetterToChar(Letter);\r
+ }\r
+\r
+ return s;\r
+ }\r
+\r
+static unsigned GetTuple(const ProfPos *PP, unsigned uPos)\r
+ {\r
+ unsigned t = 0;\r
+\r
+ for (unsigned i = 0; i < K; ++i)\r
+ {\r
+ const unsigned uLetter = PP[uPos+i].m_uResidueGroup;\r
+ if (RESIDUE_GROUP_MULTIPLE == uLetter)\r
+ return EMPTY;\r
+ t = t*4 + uLetter;\r
+ }\r
+\r
+ return t;\r
+ }\r
+\r
+void FindDiagsNuc(const ProfPos *PX, unsigned uLengthX, const ProfPos *PY,\r
+ unsigned uLengthY, DiagList &DL)\r
+ {\r
+ if (ALPHA_DNA != g_Alpha && ALPHA_RNA != g_Alpha)\r
+ Quit("FindDiagsNuc: requires nucleo alphabet");\r
+\r
+ DL.Clear();\r
+\r
+// 16 is arbitrary slop, no principled reason for this.\r
+ if (uLengthX < K + 16 || uLengthY < K + 16)\r
+ return;\r
+\r
+// Set A to shorter profile, B to longer\r
+ const ProfPos *PA;\r
+ const ProfPos *PB;\r
+ unsigned uLengthA;\r
+ unsigned uLengthB;\r
+ bool bSwap;\r
+ if (uLengthX < uLengthY)\r
+ {\r
+ bSwap = false;\r
+ PA = PX;\r
+ PB = PY;\r
+ uLengthA = uLengthX;\r
+ uLengthB = uLengthY;\r
+ }\r
+ else\r
+ {\r
+ bSwap = true;\r
+ PA = PY;\r
+ PB = PX;\r
+ uLengthA = uLengthY;\r
+ uLengthB = uLengthX;\r
+ }\r
+\r
+#if TRACE\r
+ Log("FindDiagsNuc(LengthA=%d LengthB=%d\n", uLengthA, uLengthB);\r
+#endif\r
+\r
+// Build tuple map for the longer profile, B\r
+ if (uLengthB < K)\r
+ Quit("FindDiags: profile too short");\r
+\r
+ memset(TuplePos, EMPTY, sizeof(TuplePos));\r
+\r
+ for (unsigned uPos = 0; uPos < uLengthB - K; ++uPos)\r
+ {\r
+ const unsigned uTuple = GetTuple(PB, uPos);\r
+ if (EMPTY == uTuple)\r
+ continue;\r
+ TuplePos[uTuple] = uPos;\r
+ }\r
+\r
+// Find matches\r
+ for (unsigned uPosA = 0; uPosA < uLengthA - K; ++uPosA)\r
+ {\r
+ const unsigned uTuple = GetTuple(PA, uPosA);\r
+ if (EMPTY == uTuple)\r
+ continue;\r
+ const unsigned uPosB = TuplePos[uTuple];\r
+ if (EMPTY == uPosB)\r
+ continue;\r
+\r
+ // This tuple is found in both profiles\r
+ unsigned uStartPosA = uPosA;\r
+ unsigned uStartPosB = uPosB;\r
+\r
+ // Try to extend the match forwards\r
+ unsigned uEndPosA = uPosA + K - 1;\r
+ unsigned uEndPosB = uPosB + K - 1;\r
+ for (;;)\r
+ {\r
+ if (uLengthA - 1 == uEndPosA || uLengthB - 1 == uEndPosB)\r
+ break;\r
+ const unsigned uAAGroupA = PA[uEndPosA+1].m_uResidueGroup;\r
+ if (RESIDUE_GROUP_MULTIPLE == uAAGroupA)\r
+ break;\r
+ const unsigned uAAGroupB = PB[uEndPosB+1].m_uResidueGroup;\r
+ if (RESIDUE_GROUP_MULTIPLE == uAAGroupB)\r
+ break;\r
+ if (uAAGroupA != uAAGroupB)\r
+ break;\r
+ ++uEndPosA;\r
+ ++uEndPosB;\r
+ }\r
+ uPosA = uEndPosA;\r
+\r
+#if TRACE\r
+ {\r
+ Log("Match: A %4u-%4u ", uStartPosA, uEndPosA);\r
+ for (unsigned n = uStartPosA; n <= uEndPosA; ++n)\r
+ Log("%c", LetterToChar(PA[n].m_uResidueGroup));\r
+ Log("\n");\r
+ Log(" B %4u-%4u ", uStartPosB, uEndPosB);\r
+ for (unsigned n = uStartPosB; n <= uEndPosB; ++n)\r
+ Log("%c", LetterToChar(PB[n].m_uResidueGroup));\r
+ Log("\n");\r
+ }\r
+#endif\r
+\r
+ const unsigned uLength = uEndPosA - uStartPosA + 1;\r
+ assert(uEndPosB - uStartPosB + 1 == uLength);\r
+\r
+ if (uLength >= g_uMinDiagLength)\r
+ {\r
+ if (bSwap)\r
+ DL.Add(uStartPosB, uStartPosA, uLength);\r
+ else\r
+ DL.Add(uStartPosA, uStartPosB, uLength);\r
+ }\r
+ }\r
+ }\r