+++ /dev/null
-/*****************************************************************
- * SQUID - a library of functions for biological sequence analysis
- * Copyright (C) 1992-2002 Washington University School of Medicine
- *
- * This source code is freely distributed under the terms of the
- * GNU General Public License. See the files COPYRIGHT and LICENSE
- * for details.
- *****************************************************************/
-
-/* sre_string.c
- *
- * my library of extra string functions. Some for portability
- * across UNIXes
- *
- * RCS $Id: sre_string.c 217 2011-03-19 10:27:10Z andreas $ (Original squid RCS Id: sre_string.c,v 1.11 2001/06/07 16:59:37 eddy Exp)
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <stdarg.h>
-#include <ctype.h>
-#include "squid.h"
-
-/* Function: Strdup()
- *
- * Purpose: Implementation of the common (but non-ANSI) function
- * strdup(). Robust against being passed a NULL pointer.
- *
- */
-char *
-Strdup(char *s)
-{
- char *new;
- if (s == NULL) return NULL;
- if ((new = (char *) malloc (strlen(s) +1)) == NULL) return NULL;
- strcpy(new, s);
- return new;
-}
-
-/* Function: StringChop()
- * Date: SRE, Wed Oct 29 12:10:02 1997 [TWA 721]
- *
- * Purpose: Chop trailing whitespace off of a string.
- */
-void
-StringChop(char *s)
-{
- int i;
-
- i = strlen(s) - 1; /* set i at last char in string */
- while (i >= 0 && isspace((int) s[i])) i--; /* i now at last non-whitespace char, or -1 */
- s[i+1] = '\0';
-}
-
-int
-Strinsert(char *s1, /* string to insert a char into */
- char c, /* char to insert */
- int pos) /* position in s1 to insert c at */
-{
- char oldc;
- char *s;
-
- for (s = s1 + pos; c; s++)
- {
- /* swap current char for inserted one */
- oldc = *s; /* pick up current */
- *s = c; /* put down inserted one */
- c = oldc; /* old becomes next to insert */
- }
- *s = '\0';
-
- return 1;
-}
-
-
-int
-Strdelete(char *s1, /* string to delete a char from */
- int pos) /* position of char to delete 0..n-1 */
-{
- char *s;
-
- for (s = s1 + pos; *s; s++)
- *s = *(s + 1);
-
- return 1;
-}
-
-void
-s2lower(char *s)
-{
- for (; *s != '\0'; s++)
- *s = sre_tolower((int) *s);
-}
-
-void
-s2upper(char *s)
-{
- for (; *s != '\0'; s++)
- *s = sre_toupper((int) *s);
-}
-
-
-void *
-sre_malloc(char *file, int line, size_t size)
-{
- void *ptr;
-
- SQD_DPRINTF3(("MALLOC: %d bytes (file %s line %d)\n", size, file, line));
- if ((ptr = malloc (size)) == NULL)
- Die("malloc of %ld bytes failed: file %s line %d", size, file, line);
- return ptr;
-}
-
-void *
-sre_realloc(char *file, int line, void *p, size_t size)
-{
- void *ptr;
-
- if ((ptr = realloc(p, size)) == NULL)
- Die("realloc of %ld bytes failed: file %s line %d", size, file, line);
- return ptr;
-}
-
-
-
-/* Function: Free2DArray(), Free3DArray()
- * Date: SRE, Tue Jun 1 14:47:14 1999 [St. Louis]
- *
- * Purpose: Convenience functions for free'ing 2D
- * and 3D pointer arrays. Tolerates any of the
- * pointers being NULL, to allow "sparse"
- * arrays.
- *
- * Args: p - array to be freed
- * dim1 - n for first dimension
- * dim2 - n for second dimension
- *
- * e.g. a 2d array is indexed p[0..dim1-1][]
- * a 3D array is indexed p[0..dim1-1][0..dim2-1][]
- *
- * Returns: void
- *
- * Diagnostics: (void)
- * "never fails"
- */
-void
-Free2DArray(void **p, int dim1)
-{
- int i;
-
- if (p != NULL) {
- for (i = 0; i < dim1; i++)
- if (p[i] != NULL) free(p[i]);
- free(p);
- }
-}
-void
-Free3DArray(void ***p, int dim1, int dim2)
-{
- int i, j;
-
- if (p != NULL) {
- for (i = 0; i < dim1; i++)
- if (p[i] != NULL) {
- for (j = 0; j < dim2; j++)
- if (p[i][j] != NULL) free(p[i][j]);
- free(p[i]);
- }
- free(p);
- }
-}
-
-
-/* Function: RandomSequence()
- *
- * Purpose: Generate an iid symbol sequence according
- * to some alphabet, alphabet_size, probability
- * distribution, and length. Return the
- * sequence.
- *
- * Args: alphabet - e.g. "ACGT"
- * p - probability distribution [0..n-1]
- * n - number of symbols in alphabet
- * len - length of generated sequence
- *
- * Return: ptr to random sequence, or NULL on failure.
- */
-char *
-RandomSequence(char *alphabet, float *p, int n, int len)
-{
- char *s;
- int x;
-
- s = (char *) MallocOrDie (sizeof(char) * (len+1));
- for (x = 0; x < len; x++)
- s[x] = alphabet[FChoose(p,n)];
- s[x] = '\0';
- return s;
-}
-
-/* Function: sre_fgets()
- * Date: SRE, Thu May 13 10:56:28 1999 [St. Louis]
- *
- * Purpose: Dynamic allocation version of fgets(),
- * capable of reading unlimited line lengths.
- *
- * Args: buf - ptr to a string (may be reallocated)
- * n - ptr to current allocated length of buf,
- * (may be changed)
- * fp - open file ptr for reading
- *
- * Before the first call to sre_fgets(),
- * buf should be initialized to NULL and n to 0.
- * They're a linked pair, so don't muck with the
- * allocation of buf or the value of n while
- * you're still doing sre_fgets() calls with them.
- *
- * Returns: ptr to the buffer on success.
- * NULL on EOF (buf isn't to be used in this case)
- * sre_fgets() *always* results in an allocation
- * in buf.
- *
- * The reason to have it return a ptr to buf
- * is that it makes wrapper macros easy; see
- * MSAFileGetLine() for an example.
- *
- * Example: char *buf;
- * int n;
- * FILE *fp;
- *
- * fp = fopen("my_file", "r");
- * buf = NULL;
- * n = 0;
- * while (sre_fgets(&buf, &n, fp) != NULL)
- * {
- * do stuff with buf;
- * }
- */
-char *
-sre_fgets(char **buf, int *n, FILE *fp)
-{
- char *s;
- int len;
- int pos;
-
- if (*n == 0)
- {
- *buf = MallocOrDie(sizeof(char) * 128);
- *n = 128;
- }
-
- /* Simple case 1. We're sitting at EOF, or there's an error.
- * fgets() returns NULL, so we return NULL.
- */
- if (fgets(*buf, *n, fp) == NULL) return NULL;
-
- /* Simple case 2. fgets() got a string, and it reached EOF.
- * return success status, so caller can use
- * the last line; on the next call we'll
- * return the 0 for the EOF.
- */
- if (feof(fp)) return *buf;
-
- /* Simple case 3. We got a complete string, with \n,
- * and don't need to extend the buffer.
- */
- len = strlen(*buf);
- if ((*buf)[len-1] == '\n') return *buf;
-
- /* The case we're waiting for. We have an incomplete string,
- * and we have to extend the buffer one or more times. Make
- * sure we overwrite the previous fgets's \0 (hence +(n-1)
- * in first step, rather than 128, and reads of 129, not 128).
- */
- pos = (*n)-1;
- while (1) {
- *n += 128;
- *buf = ReallocOrDie(*buf, sizeof(char) * (*n));
- s = *buf + pos;
- if (fgets(s, 129, fp) == NULL) return *buf;
- len = strlen(s);
- if (s[len-1] == '\n') return *buf;
- pos += 128;
- }
- /*NOTREACHED*/
-}
-
-/* Function: sre_strcat()
- * Date: SRE, Thu May 13 09:36:32 1999 [St. Louis]
- *
- * Purpose: Dynamic memory version of strcat().
- * appends src to the string that dest points to,
- * extending allocation for dest if necessary.
- *
- * One timing experiment (100 successive appends of
- * 1-255 char) shows sre_strcat() has about a 20%
- * overhead relative to strcat(). However, if optional
- * length info is passed, sre_strcat() is about 30%
- * faster than strcat().
- *
- * Args: dest - ptr to string (char **), '\0' terminated
- * ldest - length of dest, if known; or -1 if length unknown.
- * src - string to append to dest, '\0' terminated
- * lsrc - length of src, if known; or -1 if length unknown.
- *
- * dest may be NULL, in which case this is
- * the equivalent of dest = Strdup(src).
- *
- * src may also be NULL, in which case
- * dest is unmodified (but why would you want to pass
- * a NULL src?)
- *
- * if both dest and src are NULL, dest is
- * unmodified; it stays NULL.
- *
- * the length parameters are optional. If a -1
- * is passed, sre_strcat() will call strlen() to
- * determine the length itself. Passing length
- * info saves the strlen() calls and can speed things
- * up if lots of successive appends need to be done.
- *
- * Returns: new length of dest (>=0 on success);
- * dest is (probably) reallocated, and modified
- * to a longer string, '\0' terminated.
- */
-int
-sre_strcat(char **dest, int ldest, char *src, int lsrc)
-{
- int len1, len2;
-
- if (ldest < 0) len1 = ((*dest == NULL) ? 0 : strlen(*dest));
- else len1 = ldest;
-
- if (lsrc < 0) len2 = (( src == NULL) ? 0 : strlen(src));
- else len2 = lsrc;
-
- if (len2 == 0) return len1;
-
- if (*dest == NULL) *dest = MallocOrDie(sizeof(char) * (len2+1));
- else *dest = ReallocOrDie(*dest, sizeof(char) * (len1+len2+1));
-
- memcpy((*dest)+len1, src, len2+1);
- return len1+len2;
-}
-
-/* Function: sre_strtok()
- * Date: SRE, Wed May 19 16:30:20 1999 [St. Louis]
- *
- * Purpose: Thread-safe version of strtok().
- *
- * Returns ptr to next token in a string: skips
- * until it reaches a character that is not in the delim
- * string, and sets beginning of token. Skips to
- * next delim character (or '\0') to set the end; replaces that
- * character with '\0'.
- * If there's still more string left, sets s to point to next
- * character after the '\0' that was written, so successive
- * calls extract tokens in succession. If there was no string
- * left, s points at the terminal '\0'.
- *
- * If no token is found, returns NULL.
- *
- * Also returns the length of the token, which
- * may save us a strlen() call in some applications.
- *
- * Limitations:
- * *s can't be a constant string, since we write to it.
- *
- * Example:
- * char *tok;
- * int len;
- * char *s;
- * char buf[50] = "This is a sentence.";
- *
- * s = buf;
- * tok = sre_strtok(&s, " ", &len);
- * tok is "This"; s is "is a sentence."; len is 4.
- * tok = sre_strtok(&s, " ", &len);
- * tok is "is"; s is " a sentence."; len is 2.
- * tok = sre_strtok(&s, " ", &len);
- * tok is "a"; s is "sentence."; len is 1.
- * tok = sre_strtok(&s, " ", &len);
- * tok is "sentence."; s is "\0"; len is 9.
- * tok = sre_strtok(&s, " ", &len);
- * tok is NULL; s is "\0", len is undefined.
- *
- * Args: s - a tmp, modifiable ptr to string
- * delim - characters that delimits tokens
- * len - RETURN: length of token; pass NULL if not wanted
- *
- * Returns: ptr to next token, or NULL if there aren't any.
- */
-char *
-sre_strtok(char **s, char *delim, int *len)
-{
- char *begin, *end;
- int n;
-
- begin = *s;
- begin += strspn(begin, delim);
- if (! *begin) return NULL;
-
- n = strcspn(begin, delim);
- end = begin + n;
- if (*end == '\0') { *s = end;}
- else {
- *end = '\0';
- *s = end+1;
- }
-
- if (len != NULL) *len = n;
- return begin;
-}
-
-
-
-/* Function: sre_strdup()
- * Date: SRE, Wed May 19 17:57:28 1999 [St. Louis]
- *
- * Purpose: A version of the common but non-ANSI strdup()
- * function. Can pass len, if known, to save a
- * strlen() call.
- *
- * Args: s - string to duplicate
- * n - length of string, if known; -1 if unknown.
- *
- * Returns: allocated copy of string.
- * NULL on failure.
- */
-char *
-sre_strdup(char *s, int n)
-{
- char *new;
-
- if (s == NULL) return NULL;
- if (n < 0) n = strlen(s);
- new = MallocOrDie (sizeof(char) * (n+1));
- strcpy(new, s);
- return new;
-}
-
-
-/* Function: sre_strncpy()
- * Date: SRE, Tue Jun 22 10:10:46 1999 [Sanger Centre]
- *
- * Purpose: a strncpy() that makes sure it adds a trailing \0.
- *
- * Args: s1 - string to copy to (allocated n+1 or larger)
- * s2 - string to copy from
- * n - number of chars to copy
- *
- * Returns: s1.
- * Done only for consistency with strncpy(). Not clear
- * why it's useful for a strncpy() to return s1.
- */
-char *
-sre_strncpy(char *s1, char *s2, int n)
-{
- strncpy(s1,s2,n);
- s1[n] = '\0';
- return s1;
-}
-
-/* Function: IsBlankline()
- * Date: SRE, Fri Jun 18 14:36:08 1999 [St. Louis]
- *
- * Purpose: Returns TRUE if string consists solely of whitespace.
- *
- * Args: s - string to check
- */
-int
-IsBlankline(char *s)
-{
- for (; *s != '\0'; s++)
- if (! isspace(*s)) return FALSE;
- return TRUE;
-}
-
-
-
-#ifdef CUBS_WIN
-/* A timing test for sre_strcat()
- * cc -O2 -g sre_string.c sre_ctype.c sqerror.c sre_math.c hsregex.c -lm
- * 15.200u - 5.360u = 9.84u if sre_strcat() with no length info passed
- * 13.660u - 5.360u = 8.30u if strcat(), with a single malloc().
- * 11.370u - 5.360u = 6.01u if sre_strcat() with length info passed.
- */
-int main(void)
-{
- float p[4] = {0.25, 0.25, 0.25, 0.25};
- int buflen;
- int len;
- int nappends;
- int nstrings;
- char *s1 = NULL;
- char *s2;
- int i;
-
- nappends = 100;
- nstrings = 1000;
- while (nstrings--)
- {
- /* s1 = malloc(sizeof(char) * (255*nappends+1));
- s1[0] = '\0';
- */
-
- s1 = NULL;
- len = 0;
- for (i = 0; i < nappends; i++)
- {
- buflen = CHOOSE(255) + 1;
- s2 = RandomSequence("ACGT", p, 4, buflen);
-
- /* strcat(s1,s2); */
- if ((len = sre_strcat(&s1, len, s2, buflen)) < 0) exit(1);
- free(s2);
- }
- free(s1);
- }
- exit(0);
-}
-#endif /*CUBS_WIN*/