1 /*****************************************************************
2 * SQUID - a library of functions for biological sequence analysis
3 * Copyright (C) 1992-2002 Washington University School of Medicine
5 * This source code is freely distributed under the terms of the
6 * GNU General Public License. See the files COPYRIGHT and LICENSE
8 *****************************************************************/
10 /* rk.c (originally from rnabob's patsearch.c)
12 * Contains a compiler and a search engine for Rabin-Karp
13 * based primary sequence pattern searching on encoded
16 * See Sedgewick, _Algorithms_, for a general discussion of
17 * the Rabin-Karp algorithm. See the rkcomp or rkexec man
18 * pages for specific details.
20 * RCS $Id: rk.c 217 2011-03-19 10:27:10Z andreas $ (Original squid RCS Id: rk.c,v 1.2 1998/10/09 18:07:16 eddy Exp)
26 #include "squid.h" /* seq encoding utilities and typedefs */
35 rkcomp(char *probe) /* A,C,G,T/U, N probe string, 0-8 nt long */
37 Hashseq hashprobe = 0;
38 char coded[RK_HASHSIZE + 1];
41 /* check bounds violation on probe */
42 if ((len = strlen(probe)) > RK_HASHSIZE) return 0;
43 /* encode the probe */
44 if (seqencode(coded, probe) == 0) return 0;
45 /* pack the probe into a Hashseq */
46 for (i = 0; i < len; i++)
49 hashprobe |= (Hashseq) coded[i];
51 /* left adjust as needed */
52 for (; i < RK_HASHSIZE; i++)
55 hashprobe |= (Hashseq) NTN;
57 /* return the compiled probe */
62 rkseq(Hashseq hashprobe, /* up to 8 nt packed into the probe */
63 char *sequence) /* encoded sequence */
69 /* initialize the target hashseq */
70 for (i = 0; i < RK_HASHSIZE; i++)
72 if (*(sequence + i) == NTEND)
75 target |= (Hashseq) (*(sequence + i));
78 while (*(sequence + pos + RK_HASHSIZE -1) != NTEND)
81 printf("hashprobe: ");
85 printf("\nhashprobe & target: ");
86 writehash(hashprobe & target);
89 if ((hashprobe & target) == target)
92 target |= (Hashseq) (*(sequence + pos + RK_HASHSIZE));
95 /* now we deal with an end effect */
96 for (i = 0; i < RK_HASHSIZE; i++)
98 target |= (Hashseq) NTN;
99 if ((hashprobe & target) == target)
109 #ifdef DEBUG /* Debugging aids */
112 writehash(Hashseq hashseq)
118 writehash(hashseq/16);
120 sym = (int) (hashseq % 16);
125 for (idx = 0; sym != iupac[idx].code && idx < IUPACSYMNUM; idx++);
126 if (idx > IUPACSYMNUM)
129 putchar(iupac[idx].sym);