1 /*****************************************************************
2 * HMMER - Biological sequence analysis with profile HMMs
3 * Copyright (C) 1992-1999 Washington University School of Medicine
6 * This source code is distributed under the terms of the
7 * GNU General Public License. See the files COPYING and LICENSE
9 *****************************************************************/
13 * my library of extra string functions. Some for portability
16 * RCS $Id: sre_string.c,v 1.1.1.1 2005/03/22 08:34:25 cmzmasek Exp $
28 * Purpose: Implementation of the common (but non-ANSI) function
29 * strdup(). Robust against being passed a NULL pointer.
36 if (s == NULL) return NULL;
37 if ((new = (char *) malloc (strlen(s) +1)) == NULL) return NULL;
42 /* Function: StringChop()
43 * Date: SRE, Wed Oct 29 12:10:02 1997 [TWA 721]
45 * Purpose: Chop trailing whitespace off of a string.
52 i = strlen(s) - 1; /* set i at last char in string */
53 while (i >= 0 && isspace((int) s[i])) i--; /* i now at last non-whitespace char, or -1 */
58 Strinsert(char *s1, /* string to insert a char into */
59 char c, /* char to insert */
60 int pos) /* position in s1 to insert c at */
65 for (s = s1 + pos; c; s++)
67 /* swap current char for inserted one */
68 oldc = *s; /* pick up current */
69 *s = c; /* put down inserted one */
70 c = oldc; /* old becomes next to insert */
79 Strdelete(char *s1, /* string to delete a char from */
80 int pos) /* position of char to delete 0..n-1 */
84 for (s = s1 + pos; *s; s++)
93 for (; *s != '\0'; s++)
94 *s = sre_tolower((int) *s);
100 for (; *s != '\0'; s++)
101 *s = sre_toupper((int) *s);
106 sre_malloc(char *file, int line, size_t size)
110 SQD_DPRINTF3(("MALLOC: %d bytes (file %s line %d)\n", size, file, line));
111 if ((ptr = malloc (size)) == NULL)
112 Die("malloc of %ld bytes failed: file %s line %d", size, file, line);
117 sre_realloc(char *file, int line, void *p, size_t size)
121 if ((ptr = realloc(p, size)) == NULL)
122 Die("realloc of %ld bytes failed: file %s line %d", size, file, line);
128 /* Function: Free2DArray(), Free3DArray()
129 * Date: SRE, Tue Jun 1 14:47:14 1999 [St. Louis]
131 * Purpose: Convenience functions for free'ing 2D
132 * and 3D pointer arrays. Tolerates any of the
133 * pointers being NULL, to allow "sparse"
136 * Args: p - array to be freed
137 * dim1 - n for first dimension
138 * dim2 - n for second dimension
140 * e.g. a 2d array is indexed p[0..dim1-1][]
141 * a 3D array is indexed p[0..dim1-1][0..dim2-1][]
145 * Diagnostics: (void)
149 Free2DArray(void **p, int dim1)
154 for (i = 0; i < dim1; i++)
155 if (p[i] != NULL) free(p[i]);
160 Free3DArray(void ***p, int dim1, int dim2)
165 for (i = 0; i < dim1; i++)
167 for (j = 0; j < dim2; j++)
168 if (p[i][j] != NULL) free(p[i][j]);
176 /* Function: RandomSequence()
178 * Purpose: Generate an iid symbol sequence according
179 * to some alphabet, alphabet_size, probability
180 * distribution, and length. Return the
183 * Args: alphabet - e.g. "ACGT"
184 * p - probability distribution [0..n-1]
185 * n - number of symbols in alphabet
186 * len - length of generated sequence
188 * Return: ptr to random sequence, or NULL on failure.
191 RandomSequence(char *alphabet, float *p, int n, int len)
196 s = (char *) MallocOrDie (sizeof(char) * (len+1));
197 for (x = 0; x < len; x++)
198 s[x] = alphabet[FChoose(p,n)];
203 /* Function: sre_fgets()
204 * Date: SRE, Thu May 13 10:56:28 1999 [St. Louis]
206 * Purpose: Dynamic allocation version of fgets(),
207 * capable of reading unlimited line lengths.
209 * Args: buf - ptr to a string (may be reallocated)
210 * n - ptr to current allocated length of buf,
212 * fp - open file ptr for reading
214 * Before the first call to sre_fgets(),
215 * buf should be initialized to NULL and n to 0.
216 * They're a linked pair, so don't muck with the
217 * allocation of buf or the value of n while
218 * you're still doing sre_fgets() calls with them.
220 * Returns: ptr to the buffer on success.
221 * NULL on EOF (buf isn't to be used in this case)
222 * sre_fgets() *always* results in an allocation
225 * The reason to have it return a ptr to buf
226 * is that it makes wrapper macros easy; see
227 * MSAFileGetLine() for an example.
229 * Example: char *buf;
233 * fp = fopen("my_file", "r");
236 * while (sre_fgets(&buf, &n, fp) != NULL)
242 sre_fgets(char **buf, int *n, FILE *fp)
250 *buf = MallocOrDie(sizeof(char) * 128);
254 /* Simple case 1. We're sitting at EOF, or there's an error.
255 * fgets() returns NULL, so we return NULL.
257 if (fgets(*buf, *n, fp) == NULL) return NULL;
259 /* Simple case 2. fgets() got a string, and it reached EOF.
260 * return success status, so caller can use
261 * the last line; on the next call we'll
262 * return the 0 for the EOF.
264 if (feof(fp)) return *buf;
266 /* Simple case 3. We got a complete string, with \n,
267 * and don't need to extend the buffer.
270 if ((*buf)[len-1] == '\n') return *buf;
272 /* The case we're waiting for. We have an incomplete string,
273 * and we have to extend the buffer one or more times. Make
274 * sure we overwrite the previous fgets's \0 (hence +(n-1)
275 * in first step, rather than 128, and reads of 129, not 128).
280 *buf = ReallocOrDie(*buf, sizeof(char) * (*n));
282 if (fgets(s, 129, fp) == NULL) return *buf;
284 if (s[len-1] == '\n') return *buf;
290 /* Function: sre_strcat()
291 * Date: SRE, Thu May 13 09:36:32 1999 [St. Louis]
293 * Purpose: Dynamic memory version of strcat().
294 * appends src to the string that dest points to,
295 * extending allocation for dest if necessary.
297 * One timing experiment (100 successive appends of
298 * 1-255 char) shows sre_strcat() has about a 20%
299 * overhead relative to strcat(). However, if optional
300 * length info is passed, sre_strcat() is about 30%
301 * faster than strcat().
303 * Args: dest - ptr to string (char **), '\0' terminated
304 * ldest - length of dest, if known; or -1 if length unknown.
305 * src - string to append to dest, '\0' terminated
306 * lsrc - length of src, if known; or -1 if length unknown.
308 * dest may be NULL, in which case this is
309 * the equivalent of dest = Strdup(src).
311 * src may also be NULL, in which case
312 * dest is unmodified (but why would you want to pass
315 * if both dest and src are NULL, dest is
316 * unmodified; it stays NULL.
318 * the length parameters are optional. If a -1
319 * is passed, sre_strcat() will call strlen() to
320 * determine the length itself. Passing length
321 * info saves the strlen() calls and can speed things
322 * up if lots of successive appends need to be done.
324 * Returns: new length of dest (>=0 on success);
325 * dest is (probably) reallocated, and modified
326 * to a longer string, '\0' terminated.
329 sre_strcat(char **dest, int ldest, char *src, int lsrc)
333 if (ldest < 0) len1 = ((*dest == NULL) ? 0 : strlen(*dest));
336 if (lsrc < 0) len2 = (( src == NULL) ? 0 : strlen(src));
339 if (len2 == 0) return len1;
341 if (*dest == NULL) *dest = MallocOrDie(sizeof(char) * (len2+1));
342 else *dest = ReallocOrDie(*dest, sizeof(char) * (len1+len2+1));
344 memcpy((*dest)+len1, src, len2+1);
348 /* Function: sre_strtok()
349 * Date: SRE, Wed May 19 16:30:20 1999 [St. Louis]
351 * Purpose: Thread-safe version of strtok().
353 * Returns ptr to next token in a string: skips
354 * until it reaches a character that is not in the delim
355 * string, and sets beginning of token. Skips to
356 * next delim character (or '\0') to set the end; replaces that
357 * character with '\0'.
358 * If there's still more string left, sets s to point to next
359 * character after the '\0' that was written, so successive
360 * calls extract tokens in succession. If there was no string
361 * left, s points at the terminal '\0'.
363 * If no token is found, returns NULL.
365 * Also returns the length of the token, which
366 * may save us a strlen() call in some applications.
369 * *s can't be a constant string, since we write to it.
375 * char buf[50] = "This is a sentence.";
378 * tok = sre_strtok(&s, " ", &len);
379 * tok is "This"; s is "is a sentence."; len is 4.
380 * tok = sre_strtok(&s, " ", &len);
381 * tok is "is"; s is " a sentence."; len is 2.
382 * tok = sre_strtok(&s, " ", &len);
383 * tok is "a"; s is "sentence."; len is 1.
384 * tok = sre_strtok(&s, " ", &len);
385 * tok is "sentence."; s is "\0"; len is 9.
386 * tok = sre_strtok(&s, " ", &len);
387 * tok is NULL; s is "\0", len is undefined.
389 * Args: s - a tmp, modifiable ptr to string
390 * delim - characters that delimits tokens
391 * len - RETURN: length of token; pass NULL if not wanted
393 * Returns: ptr to next token, or NULL if there aren't any.
396 sre_strtok(char **s, char *delim, int *len)
402 begin += strspn(begin, delim);
403 if (! *begin) return NULL;
405 n = strcspn(begin, delim);
407 if (*end == '\0') { *s = end;}
413 if (len != NULL) *len = n;
419 /* Function: sre_strdup()
420 * Date: SRE, Wed May 19 17:57:28 1999 [St. Louis]
422 * Purpose: A version of the common but non-ANSI strdup()
423 * function. Can pass len, if known, to save a
426 * Args: s - string to duplicate
427 * n - length of string, if known; -1 if unknown.
429 * Returns: allocated copy of string.
433 sre_strdup(char *s, int n)
437 if (s == NULL) return NULL;
438 if (n < 0) n = strlen(s);
439 new = MallocOrDie (sizeof(char) * (n+1));
445 /* Function: sre_strncpy()
446 * Date: SRE, Tue Jun 22 10:10:46 1999 [Sanger Centre]
448 * Purpose: a strncpy() that makes sure it adds a trailing \0.
450 * Args: s1 - string to copy to (allocated n+1 or larger)
451 * s2 - string to copy from
452 * n - number of chars to copy
455 * Done only for consistency with strncpy(). Not clear
456 * why it's useful for a strncpy() to return s1.
459 sre_strncpy(char *s1, char *s2, int n)
466 /* Function: IsBlankline()
467 * Date: SRE, Fri Jun 18 14:36:08 1999 [St. Louis]
469 * Purpose: Returns TRUE if string consists solely of whitespace.
471 * Args: s - string to check
476 for (; *s != '\0'; s++)
477 if (! isspace(*s)) return FALSE;
484 /* A timing test for sre_strcat()
485 * cc -O2 -g sre_string.c sre_ctype.c sqerror.c sre_math.c hsregex.c -lm
486 * 15.200u - 5.360u = 9.84u if sre_strcat() with no length info passed
487 * 13.660u - 5.360u = 8.30u if strcat(), with a single malloc().
488 * 11.370u - 5.360u = 6.01u if sre_strcat() with length info passed.
492 float p[4] = {0.25, 0.25, 0.25, 0.25};
505 /* s1 = malloc(sizeof(char) * (255*nappends+1));
511 for (i = 0; i < nappends; i++)
513 buflen = CHOOSE(255) + 1;
514 s2 = RandomSequence("ACGT", p, 4, buflen);
517 if ((len = sre_strcat(&s1, len, s2, buflen)) < 0) exit(1);