--- /dev/null
+/*****************************************************************
+ * SQUID - a library of functions for biological sequence analysis
+ * Copyright (C) 1992-2002 Washington University School of Medicine
+ *
+ * This source code is freely distributed under the terms of the
+ * GNU General Public License. See the files COPYRIGHT and LICENSE
+ * for details.
+ *****************************************************************/
+
+/* seqencode.c
+ *
+ * Routines for creating and manipulating encoded sequence strings.
+ * RCS $Id: seqencode.c 217 2011-03-19 10:27:10Z andreas $ (Original squid RCS Id: seqencode.c,v 1.3 1999/05/02 21:55:27 eddy Exp)
+ */
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include "squid.h"
+
+
+#ifdef MEMDEBUG
+#include "dbmalloc.h"
+#endif
+ /* seqcmp()
+ returns 0 if s1 == s2
+ mismatch number otherwise */
+int
+seqcmp(char *s1, char *s2, int allow)
+{
+ int mmat = 0;
+
+ while ((*s1 != NTEND) && (*s2 != NTEND) && (mmat <= allow))
+ {
+ if (!(ntmatch(*s1, *s2)))
+ mmat++;;
+ s1++;
+ s2++;
+ }
+ while ((*s1++ != NTEND) && (mmat <= allow))
+ mmat++;
+ return(mmat);
+}
+ /* seqncmp()
+ same as seqcmp but it looks at,
+ at most, n positions */
+int
+seqncmp(char *s1, char *s2, int n, int allow)
+{
+ int mmat = 0;
+
+ while ((*s2 != NTEND) &&
+ (n-- != 0))
+ {
+ if ((!(ntmatch(*s1, *s2))) &&
+ (++mmat > allow))
+ return(mmat);
+ s1++;
+ s2++;
+ }
+ while ((n-- != 0) && (*s1++ != NTEND) && (mmat <= allow))
+ mmat++;
+ return (mmat);
+}
+
+ /* seqencode()
+ given a character text string str (A,C,G,T),
+ convert to an encoded seq string;
+ return 1 for success, 0 if fail */
+int
+seqencode(char *codeseq, /* pre-allocated space for answer */
+ char *str) /* character string to convert */
+{
+ char *ptr;
+ int idx;
+
+ ptr = codeseq;
+ while (*str != '\0')
+ {
+ if (islower((int) (*str))) *str = (char) toupper((int) (*str));
+ for (idx = 0; *str != iupac[idx].sym && idx <= IUPACSYMNUM; idx++)
+ ;
+ if (idx > IUPACSYMNUM)
+ {
+ *ptr = (char) NTEND;
+ return 0;
+ }
+ else
+ *ptr = iupac[idx].code;
+ ptr++;
+ str++;
+ }
+ *ptr = NTEND;
+ return 1;
+}
+
+
+int
+coded_revcomp(char *comp, char *seq)
+{
+ long bases;
+ char *bckp, *fwdp;
+ int idx;
+ long pos;
+
+ bases = strlen(seq);
+
+ fwdp = comp;
+ bckp = seq + bases -1;
+ for (pos = 0; pos < bases; pos++)
+ {
+ for (idx = 0; *bckp != iupac[idx].code && idx < IUPACSYMNUM; idx++);
+ if (idx > IUPACSYMNUM)
+ {
+ *fwdp = NTEND;
+ return 0;
+ }
+ else
+ *fwdp = iupac[idx].comp;
+ fwdp++;
+ bckp--;
+ }
+ *fwdp = NTEND;
+ return(1);
+}
+
+int
+seqdecode(char *str, char *codeseq)
+{
+ int idx;
+ int pos;
+
+ pos = 0;
+ while (*codeseq != NTEND)
+ {
+ for (idx = 0; *codeseq != iupac[idx].code && idx < IUPACSYMNUM; idx++)
+ ;
+ if (idx > IUPACSYMNUM)
+ {
+ str[pos] = 'X';
+ return 0;
+ }
+ else
+ str[pos] = iupac[idx].sym;
+ codeseq++;
+ pos++;
+ }
+ str[pos] = '\0';
+ return 1;
+}
+
+int
+seqndecode(
+ char *str, /* pre-allocated string to write into */
+ char *codeseq, /* sequence to decode */
+ int n) /* how many bases to decode */
+{
+ int idx;
+ int pos = 0;
+
+ while (--n >= 0)
+ {
+ for (idx = 0; *codeseq != iupac[idx].code && idx < IUPACSYMNUM; idx++);
+ if (idx > IUPACSYMNUM)
+ {
+ str[pos] = 'X';
+ return 0;
+ }
+ else
+ str[pos] = iupac[idx].sym;
+ codeseq++;
+ pos++;
+ }
+ str[pos] = '\0';
+ return 1;
+}
+