1 /*****************************************************************
2 * HMMER - Biological sequence analysis with profile HMMs
3 * Copyright (C) 1992-1999 Washington University School of Medicine
6 * This source code is distributed under the terms of the
7 * GNU General Public License. See the files COPYING and LICENSE
9 *****************************************************************/
11 /* rk.c (originally from rnabob's patsearch.c)
13 * Contains a compiler and a search engine for Rabin-Karp
14 * based primary sequence pattern searching on encoded
17 * See Sedgewick, _Algorithms_, for a general discussion of
18 * the Rabin-Karp algorithm. See the rkcomp or rkexec man
19 * pages for specific details.
21 * RCS $Id: rk.c,v 1.1.1.1 2005/03/22 08:34:16 cmzmasek Exp $
27 #include "squid.h" /* seq encoding utilities and typedefs */
36 rkcomp(char *probe) /* A,C,G,T/U, N probe string, 0-8 nt long */
38 Hashseq hashprobe = 0;
39 char coded[RK_HASHSIZE + 1];
42 /* check bounds violation on probe */
43 if ((len = strlen(probe)) > RK_HASHSIZE) return 0;
44 /* encode the probe */
45 if (seqencode(coded, probe) == 0) return 0;
46 /* pack the probe into a Hashseq */
47 for (i = 0; i < len; i++)
50 hashprobe |= (Hashseq) coded[i];
52 /* left adjust as needed */
53 for (; i < RK_HASHSIZE; i++)
56 hashprobe |= (Hashseq) NTN;
58 /* return the compiled probe */
63 rkseq(Hashseq hashprobe, /* up to 8 nt packed into the probe */
64 char *sequence) /* encoded sequence */
70 /* initialize the target hashseq */
71 for (i = 0; i < RK_HASHSIZE; i++)
73 if (*(sequence + i) == NTEND)
76 target |= (Hashseq) (*(sequence + i));
79 while (*(sequence + pos + RK_HASHSIZE -1) != NTEND)
82 printf("hashprobe: ");
86 printf("\nhashprobe & target: ");
87 writehash(hashprobe & target);
90 if ((hashprobe & target) == target)
93 target |= (Hashseq) (*(sequence + pos + RK_HASHSIZE));
96 /* now we deal with an end effect */
97 for (i = 0; i < RK_HASHSIZE; i++)
99 target |= (Hashseq) NTN;
100 if ((hashprobe & target) == target)
110 #ifdef DEBUG /* Debugging aids */
113 writehash(Hashseq hashseq)
119 writehash(hashseq/16);
121 sym = (int) (hashseq % 16);
126 for (idx = 0; sym != iupac[idx].code && idx < IUPACSYMNUM; idx++);
127 if (idx > IUPACSYMNUM)
130 putchar(iupac[idx].sym);