/***************************************************************** * SQUID - a library of functions for biological sequence analysis * Copyright (C) 1992-2002 Washington University School of Medicine * * This source code is freely distributed under the terms of the * GNU General Public License. See the files COPYRIGHT and LICENSE * for details. *****************************************************************/ /* seqencode.c * * Routines for creating and manipulating encoded sequence strings. * RCS $Id: seqencode.c 217 2011-03-19 10:27:10Z andreas $ (Original squid RCS Id: seqencode.c,v 1.3 1999/05/02 21:55:27 eddy Exp) */ #include #include #include #include "squid.h" #ifdef MEMDEBUG #include "dbmalloc.h" #endif /* seqcmp() returns 0 if s1 == s2 mismatch number otherwise */ int seqcmp(char *s1, char *s2, int allow) { int mmat = 0; while ((*s1 != NTEND) && (*s2 != NTEND) && (mmat <= allow)) { if (!(ntmatch(*s1, *s2))) mmat++;; s1++; s2++; } while ((*s1++ != NTEND) && (mmat <= allow)) mmat++; return(mmat); } /* seqncmp() same as seqcmp but it looks at, at most, n positions */ int seqncmp(char *s1, char *s2, int n, int allow) { int mmat = 0; while ((*s2 != NTEND) && (n-- != 0)) { if ((!(ntmatch(*s1, *s2))) && (++mmat > allow)) return(mmat); s1++; s2++; } while ((n-- != 0) && (*s1++ != NTEND) && (mmat <= allow)) mmat++; return (mmat); } /* seqencode() given a character text string str (A,C,G,T), convert to an encoded seq string; return 1 for success, 0 if fail */ int seqencode(char *codeseq, /* pre-allocated space for answer */ char *str) /* character string to convert */ { char *ptr; int idx; ptr = codeseq; while (*str != '\0') { if (islower((int) (*str))) *str = (char) toupper((int) (*str)); for (idx = 0; *str != iupac[idx].sym && idx <= IUPACSYMNUM; idx++) ; if (idx > IUPACSYMNUM) { *ptr = (char) NTEND; return 0; } else *ptr = iupac[idx].code; ptr++; str++; } *ptr = NTEND; return 1; } int coded_revcomp(char *comp, char *seq) { long bases; char *bckp, *fwdp; int idx; long pos; bases = strlen(seq); fwdp = comp; bckp = seq + bases -1; for (pos = 0; pos < bases; pos++) { for (idx = 0; *bckp != iupac[idx].code && idx < IUPACSYMNUM; idx++); if (idx > IUPACSYMNUM) { *fwdp = NTEND; return 0; } else *fwdp = iupac[idx].comp; fwdp++; bckp--; } *fwdp = NTEND; return(1); } int seqdecode(char *str, char *codeseq) { int idx; int pos; pos = 0; while (*codeseq != NTEND) { for (idx = 0; *codeseq != iupac[idx].code && idx < IUPACSYMNUM; idx++) ; if (idx > IUPACSYMNUM) { str[pos] = 'X'; return 0; } else str[pos] = iupac[idx].sym; codeseq++; pos++; } str[pos] = '\0'; return 1; } int seqndecode( char *str, /* pre-allocated string to write into */ char *codeseq, /* sequence to decode */ int n) /* how many bases to decode */ { int idx; int pos = 0; while (--n >= 0) { for (idx = 0; *codeseq != iupac[idx].code && idx < IUPACSYMNUM; idx++); if (idx > IUPACSYMNUM) { str[pos] = 'X'; return 0; } else str[pos] = iupac[idx].sym; codeseq++; pos++; } str[pos] = '\0'; return 1; }