Mac binaries
[jabaws.git] / website / archive / binaries / mac / src / muscle / finddiagsn.cpp
diff --git a/website/archive/binaries/mac/src/muscle/finddiagsn.cpp b/website/archive/binaries/mac/src/muscle/finddiagsn.cpp
new file mode 100644 (file)
index 0000000..58a2fc3
--- /dev/null
@@ -0,0 +1,152 @@
+#include "muscle.h"\r
+#include "profile.h"\r
+#include "diaglist.h"\r
+\r
+#define TRACE  0\r
+\r
+#define pow4(i)        (1 << (2*i))    // 4^i = 2^(2*i)\r
+const unsigned K = 7;\r
+const unsigned KTUPS = pow4(K);\r
+static unsigned TuplePos[KTUPS];\r
+\r
+static char *TupleToStr(int t)\r
+       {\r
+       static char s[K];\r
+\r
+       for (int i = 0; i < K; ++i)\r
+               {\r
+               unsigned Letter = (t/(pow4(i)))%4;\r
+               assert(Letter >= 0 && Letter < 4);\r
+               s[K-i-1] = LetterToChar(Letter);\r
+               }\r
+\r
+       return s;\r
+       }\r
+\r
+static unsigned GetTuple(const ProfPos *PP, unsigned uPos)\r
+       {\r
+       unsigned t = 0;\r
+\r
+       for (unsigned i = 0; i < K; ++i)\r
+               {\r
+               const unsigned uLetter = PP[uPos+i].m_uResidueGroup;\r
+               if (RESIDUE_GROUP_MULTIPLE == uLetter)\r
+                       return EMPTY;\r
+               t = t*4 + uLetter;\r
+               }\r
+\r
+       return t;\r
+       }\r
+\r
+void FindDiagsNuc(const ProfPos *PX, unsigned uLengthX, const ProfPos *PY,\r
+  unsigned uLengthY, DiagList &DL)\r
+       {\r
+       if (ALPHA_DNA != g_Alpha && ALPHA_RNA != g_Alpha)\r
+               Quit("FindDiagsNuc: requires nucleo alphabet");\r
+\r
+       DL.Clear();\r
+\r
+// 16 is arbitrary slop, no principled reason for this.\r
+       if (uLengthX < K + 16 || uLengthY < K + 16)\r
+               return;\r
+\r
+// Set A to shorter profile, B to longer\r
+       const ProfPos *PA;\r
+       const ProfPos *PB;\r
+       unsigned uLengthA;\r
+       unsigned uLengthB;\r
+       bool bSwap;\r
+       if (uLengthX < uLengthY)\r
+               {\r
+               bSwap = false;\r
+               PA = PX;\r
+               PB = PY;\r
+               uLengthA = uLengthX;\r
+               uLengthB = uLengthY;\r
+               }\r
+       else\r
+               {\r
+               bSwap = true;\r
+               PA = PY;\r
+               PB = PX;\r
+               uLengthA = uLengthY;\r
+               uLengthB = uLengthX;\r
+               }\r
+\r
+#if    TRACE\r
+       Log("FindDiagsNuc(LengthA=%d LengthB=%d\n", uLengthA, uLengthB);\r
+#endif\r
+\r
+// Build tuple map for the longer profile, B\r
+       if (uLengthB < K)\r
+               Quit("FindDiags: profile too short");\r
+\r
+       memset(TuplePos, EMPTY, sizeof(TuplePos));\r
+\r
+       for (unsigned uPos = 0; uPos < uLengthB - K; ++uPos)\r
+               {\r
+               const unsigned uTuple = GetTuple(PB, uPos);\r
+               if (EMPTY == uTuple)\r
+                       continue;\r
+               TuplePos[uTuple] = uPos;\r
+               }\r
+\r
+// Find matches\r
+       for (unsigned uPosA = 0; uPosA < uLengthA - K; ++uPosA)\r
+               {\r
+               const unsigned uTuple = GetTuple(PA, uPosA);\r
+               if (EMPTY == uTuple)\r
+                       continue;\r
+               const unsigned uPosB = TuplePos[uTuple];\r
+               if (EMPTY == uPosB)\r
+                       continue;\r
+\r
+       // This tuple is found in both profiles\r
+               unsigned uStartPosA = uPosA;\r
+               unsigned uStartPosB = uPosB;\r
+\r
+       // Try to extend the match forwards\r
+               unsigned uEndPosA = uPosA + K - 1;\r
+               unsigned uEndPosB = uPosB + K - 1;\r
+               for (;;)\r
+                       {\r
+                       if (uLengthA - 1 == uEndPosA || uLengthB - 1 == uEndPosB)\r
+                               break;\r
+                       const unsigned uAAGroupA = PA[uEndPosA+1].m_uResidueGroup;\r
+                       if (RESIDUE_GROUP_MULTIPLE == uAAGroupA)\r
+                               break;\r
+                       const unsigned uAAGroupB = PB[uEndPosB+1].m_uResidueGroup;\r
+                       if (RESIDUE_GROUP_MULTIPLE == uAAGroupB)\r
+                               break;\r
+                       if (uAAGroupA != uAAGroupB)\r
+                               break;\r
+                       ++uEndPosA;\r
+                       ++uEndPosB;\r
+                       }\r
+               uPosA = uEndPosA;\r
+\r
+#if    TRACE\r
+               {\r
+               Log("Match: A %4u-%4u   ", uStartPosA, uEndPosA);\r
+               for (unsigned n = uStartPosA; n <= uEndPosA; ++n)\r
+                       Log("%c", LetterToChar(PA[n].m_uResidueGroup));\r
+               Log("\n");\r
+               Log("       B %4u-%4u   ", uStartPosB, uEndPosB);\r
+               for (unsigned n = uStartPosB; n <= uEndPosB; ++n)\r
+                       Log("%c", LetterToChar(PB[n].m_uResidueGroup));\r
+               Log("\n");\r
+               }\r
+#endif\r
+\r
+               const unsigned uLength = uEndPosA - uStartPosA + 1;\r
+               assert(uEndPosB - uStartPosB + 1 == uLength);\r
+\r
+               if (uLength >= g_uMinDiagLength)\r
+                       {\r
+                       if (bSwap)\r
+                               DL.Add(uStartPosB, uStartPosA, uLength);\r
+                       else\r
+                               DL.Add(uStartPosA, uStartPosB, uLength);\r
+                       }\r
+               }\r
+       }\r