--- /dev/null
+/*****************************************************************
+ * SQUID - a library of functions for biological sequence analysis
+ * Copyright (C) 1992-2002 Washington University School of Medicine
+ *
+ * This source code is freely distributed under the terms of the
+ * GNU General Public License. See the files COPYRIGHT and LICENSE
+ * for details.
+ *****************************************************************/
+
+/* rk.c (originally from rnabob's patsearch.c)
+ *
+ * Contains a compiler and a search engine for Rabin-Karp
+ * based primary sequence pattern searching on encoded
+ * sequences.
+ *
+ * See Sedgewick, _Algorithms_, for a general discussion of
+ * the Rabin-Karp algorithm. See the rkcomp or rkexec man
+ * pages for specific details.
+ *
+ * RCS $Id: rk.c 217 2011-03-19 10:27:10Z andreas $ (Original squid RCS Id: rk.c,v 1.2 1998/10/09 18:07:16 eddy Exp)
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include "squid.h" /* seq encoding utilities and typedefs */
+#include "rk.h"
+
+
+#ifdef MEMDEBUG
+#include "dbmalloc.h"
+#endif
+
+Hashseq
+rkcomp(char *probe) /* A,C,G,T/U, N probe string, 0-8 nt long */
+{
+ Hashseq hashprobe = 0;
+ char coded[RK_HASHSIZE + 1];
+ int len;
+ int i;
+ /* check bounds violation on probe */
+ if ((len = strlen(probe)) > RK_HASHSIZE) return 0;
+ /* encode the probe */
+ if (seqencode(coded, probe) == 0) return 0;
+ /* pack the probe into a Hashseq */
+ for (i = 0; i < len; i++)
+ {
+ hashprobe <<= 4;
+ hashprobe |= (Hashseq) coded[i];
+ }
+ /* left adjust as needed */
+ for (; i < RK_HASHSIZE; i++)
+ {
+ hashprobe <<= 4;
+ hashprobe |= (Hashseq) NTN;
+ }
+ /* return the compiled probe */
+ return hashprobe;
+}
+
+int
+rkseq(Hashseq hashprobe, /* up to 8 nt packed into the probe */
+ char *sequence) /* encoded sequence */
+{
+ long i;
+ long pos = 0;
+ Hashseq target = 0;
+
+ /* initialize the target hashseq */
+ for (i = 0; i < RK_HASHSIZE; i++)
+ {
+ if (*(sequence + i) == NTEND)
+ break;
+ target <<= 4;
+ target |= (Hashseq) (*(sequence + i));
+ }
+
+ while (*(sequence + pos + RK_HASHSIZE -1) != NTEND)
+ {
+#ifdef DEBUG
+ printf("hashprobe: ");
+ writehash(hashprobe);
+ printf("\ttarget: ");
+ writehash(target);
+ printf("\nhashprobe & target: ");
+ writehash(hashprobe & target);
+ printf("\n");
+#endif
+ if ((hashprobe & target) == target)
+ return ((int) pos);
+ target <<= 4;
+ target |= (Hashseq) (*(sequence + pos + RK_HASHSIZE));
+ pos++;
+ }
+ /* now we deal with an end effect */
+ for (i = 0; i < RK_HASHSIZE; i++)
+ {
+ target |= (Hashseq) NTN;
+ if ((hashprobe & target) == target)
+ return ((int) pos);
+ target <<=4;
+ pos++;
+ }
+
+ return(-1);
+}
+
+
+#ifdef DEBUG /* Debugging aids */
+
+static void
+writehash(Hashseq hashseq)
+{
+ int idx;
+ int sym;
+
+ if (hashseq/16)
+ writehash(hashseq/16);
+
+ sym = (int) (hashseq % 16);
+ if (sym == 0)
+ putchar('-');
+ else
+ {
+ for (idx = 0; sym != iupac[idx].code && idx < IUPACSYMNUM; idx++);
+ if (idx > IUPACSYMNUM)
+ printf("(%d)", sym);
+ else
+ putchar(iupac[idx].sym);
+ }
+}
+
+#endif