RNAlib-2.1.2
utils.h File Reference

Various utility- and helper-functions used throughout the Vienna RNA package. More...

+ This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Macros

#define VRNA_INPUT_ERROR   1U
#define VRNA_INPUT_QUIT   2U
#define VRNA_INPUT_MISC   4U
#define VRNA_INPUT_FASTA_HEADER   8U
#define VRNA_INPUT_SEQUENCE   16U
#define VRNA_INPUT_CONSTRAINT   32U
#define VRNA_INPUT_NO_TRUNCATION   256U
#define VRNA_INPUT_NO_REST   512U
#define VRNA_INPUT_NO_SPAN   1024U
#define VRNA_INPUT_NOSKIP_BLANK_LINES   2048U
#define VRNA_INPUT_BLANK_LINE   4096U
#define VRNA_INPUT_NOSKIP_COMMENTS   128U
#define VRNA_INPUT_COMMENT   8192U
#define VRNA_CONSTRAINT_PIPE   1U
#define VRNA_CONSTRAINT_DOT   2U
#define VRNA_CONSTRAINT_X   4U
#define VRNA_CONSTRAINT_ANG_BRACK   8U
#define VRNA_CONSTRAINT_RND_BRACK   16U
#define VRNA_CONSTRAINT_MULTILINE   32U
#define VRNA_CONSTRAINT_NO_HEADER   64U
#define VRNA_CONSTRAINT_ALL   128U
#define VRNA_CONSTRAINT_G   256U
#define VRNA_OPTION_MULTILINE   32U
#define MIN2(A, B)   ((A) < (B) ? (A) : (B))
#define MAX2(A, B)   ((A) > (B) ? (A) : (B))
#define MIN3(A, B, C)   (MIN2( (MIN2((A),(B))) ,(C)))
#define MAX3(A, B, C)   (MAX2( (MAX2((A),(B))) ,(C)))
#define XSTR(s)   STR(s)
#define STR(s)   #s
#define FILENAME_MAX_LENGTH   80
 Maximum length of filenames that are generated by our programs.
#define FILENAME_ID_LENGTH   42
 Maximum length of id taken from fasta header for filename generation.

Functions

void * space (unsigned size)
 Allocate space safely.
void * xrealloc (void *p, unsigned size)
 Reallocate space safely.
void nrerror (const char message[])
 Die with an error message.
void warn_user (const char message[])
 Print a warning message.
void init_rand (void)
 Make random number seeds.
double urn (void)
 get a random number from [0..1]
int int_urn (int from, int to)
 Generates a pseudo random integer in a specified range.
char * time_stamp (void)
 Get a timestamp.
char * random_string (int l, const char symbols[])
 Create a random string using characters from a specified symbol set.
int hamming (const char *s1, const char *s2)
 Calculate hamming distance between two sequences.
int hamming_bound (const char *s1, const char *s2, int n)
 Calculate hamming distance between two sequences up to a specified length.
char * get_line (FILE *fp)
 Read a line of arbitrary length from a stream.
unsigned int get_input_line (char **string, unsigned int options)
unsigned int read_record (char **header, char **sequence, char ***rest, unsigned int options)
 Get a data record from stdin.
char * pack_structure (const char *struc)
 Pack secondary secondary structure, 5:1 compression using base 3 encoding.
char * unpack_structure (const char *packed)
 Unpack secondary structure previously packed with pack_structure()
short * make_pair_table (const char *structure)
 Create a pair table of a secondary structure.
short * copy_pair_table (const short *pt)
 Get an exact copy of a pair table.
short * alimake_pair_table (const char *structure)
short * make_pair_table_snoop (const char *structure)
int * make_loop_index_pt (short *pt)
 Compute the "base pair" distance between two secondary structures s1 and s2.
void print_tty_input_seq (void)
 Print a line to stdout that asks for an input sequence.
void print_tty_input_seq_str (const char *s)
 Print a line with a user defined string and a ruler to stdout.
void print_tty_constraint_full (void)
 Print structure constraint characters to stdout (full constraint support)
void print_tty_constraint (unsigned int option)
 Print structure constraint characters to stdout. (constraint support is specified by option parameter)
void str_DNA2RNA (char *sequence)
 Convert a DNA input sequence to RNA alphabet.
void str_uppercase (char *sequence)
 Convert an input sequence to uppercase.
int * get_iindx (unsigned int length)
 Get an index mapper array (iindx) for accessing the energy matrices, e.g. in partition function related functions.
int * get_indx (unsigned int length)
 Get an index mapper array (indx) for accessing the energy matrices, e.g. in MFE related functions.
void constrain_ptypes (const char *constraint, unsigned int length, char *ptype, int *BP, int min_loop_size, unsigned int idx_type)
 Insert constraining pair types according to constraint structure string.

Variables

unsigned short xsubi [3]
 Current 48 bit random number.

Detailed Description

Various utility- and helper-functions used throughout the Vienna RNA package.


Macro Definition Documentation

#define VRNA_INPUT_ERROR   1U

Output flag of get_input_line(): "An ERROR has occured, maybe EOF"

#define VRNA_INPUT_QUIT   2U

Output flag of get_input_line(): "the user requested quitting the program"

#define VRNA_INPUT_MISC   4U

Output flag of get_input_line(): "something was read"

#define VRNA_INPUT_FASTA_HEADER   8U

Input/Output flag of get_input_line():
if used as input option this tells get_input_line() that the data to be read should comply with the FASTA format

the function will return this flag if a fasta header was read

#define VRNA_INPUT_SEQUENCE   16U

Input flag for get_input_line():
Tell get_input_line() that we assume to read a nucleotide sequence

#define VRNA_INPUT_CONSTRAINT   32U

Input flag for get_input_line():
Tell get_input_line() that we assume to read a structure constraint

#define VRNA_INPUT_NO_TRUNCATION   256U

Input switch for get_input_line(): "do not trunkate the line by eliminating white spaces at end of line"

#define VRNA_INPUT_NO_REST   512U

Input switch for read_record(): "do fill rest array"

#define VRNA_INPUT_NO_SPAN   1024U

Input switch for read_record(): "never allow data to span more than one line"

#define VRNA_INPUT_NOSKIP_BLANK_LINES   2048U

Input switch for read_record(): "do not skip empty lines"

#define VRNA_INPUT_BLANK_LINE   4096U

Output flag for read_record(): "read an empty line"

#define VRNA_INPUT_NOSKIP_COMMENTS   128U

Input switch for get_input_line(): "do not skip comment lines"

#define VRNA_INPUT_COMMENT   8192U

Output flag for read_record(): "read a comment"

#define VRNA_CONSTRAINT_PIPE   1U

pipe sign '|' switch for structure constraints (paired with another base)

#define VRNA_CONSTRAINT_DOT   2U

dot '.' switch for structure constraints (no constraint at all)

#define VRNA_CONSTRAINT_X   4U

'x' switch for structure constraint (base must not pair)

#define VRNA_CONSTRAINT_ANG_BRACK   8U

angle brackets '<', '>' switch for structure constraint (paired downstream/upstream)

#define VRNA_CONSTRAINT_RND_BRACK   16U

round brackets '(',')' switch for structure constraint (base i pairs base j)

#define VRNA_CONSTRAINT_MULTILINE   32U

constraint may span over several lines

#define VRNA_CONSTRAINT_NO_HEADER   64U

do not print the header information line

#define VRNA_CONSTRAINT_ALL   128U

placeholder for all constraining characters

#define VRNA_CONSTRAINT_G   256U

'+' switch for structure constraint (base is involved in a gquad)

#define VRNA_OPTION_MULTILINE   32U

Tell a function that an input is assumed to span several lines if used as input-option A function might also be returning this state telling that it has read data from multiple lines.

See also:
extract_record_rest_structure(), read_record(), getConstraint()
#define MIN2 (   A,
 
)    ((A) < (B) ? (A) : (B))

Get the minimum of two comparable values

#define MAX2 (   A,
 
)    ((A) > (B) ? (A) : (B))

Get the maximum of two comparable values

#define MIN3 (   A,
  B,
 
)    (MIN2( (MIN2((A),(B))) ,(C)))

Get the minimum of three comparable values

#define MAX3 (   A,
  B,
 
)    (MAX2( (MAX2((A),(B))) ,(C)))

Get the maximum of three comparable values

#define XSTR (   s)    STR(s)

Stringify a macro after expansion

#define STR (   s)    #s

Stringify a macro argument

#define FILENAME_MAX_LENGTH   80

Maximum length of filenames that are generated by our programs.

This definition should be used throughout the complete ViennaRNA package wherever a static array holding filenames of output files is declared.

#define FILENAME_ID_LENGTH   42

Maximum length of id taken from fasta header for filename generation.

this has to be smaller than FILENAME_MAX_LENGTH since in most cases, some suffix will be appended to the ID


Function Documentation

void* space ( unsigned  size)

Allocate space safely.

Parameters:
sizeThe size of the memory to be allocated in bytes
Returns:
A pointer to the allocated memory
void* xrealloc ( void *  p,
unsigned  size 
)

Reallocate space safely.

Parameters:
pA pointer to the memory region to be reallocated
sizeThe size of the memory to be allocated in bytes
Returns:
A pointer to the newly allocated memory
void nrerror ( const char  message[])

Die with an error message.

See also:
warn_user()
Parameters:
messageThe error message to be printed before exiting with 'FAILURE'
void warn_user ( const char  message[])

Print a warning message.

Print a warning message to stderr

Parameters:
messageThe warning message
double urn ( void  )

get a random number from [0..1]

Note:
Usually implemented by calling erand48().
Returns:
A random number in range [0..1]
int int_urn ( int  from,
int  to 
)

Generates a pseudo random integer in a specified range.

Parameters:
fromThe first number in range
toThe last number in range
Returns:
A pseudo random number in range [from, to]
char* time_stamp ( void  )

Get a timestamp.

Returns a string containing the current date in the format

Fri Mar 19 21:10:57 1993
Returns:
A string containing the timestamp
char* random_string ( int  l,
const char  symbols[] 
)

Create a random string using characters from a specified symbol set.

Parameters:
lThe length of the sequence
symbolsThe symbol set
Returns:
A random string of length 'l' containing characters from the symbolset
int hamming ( const char *  s1,
const char *  s2 
)

Calculate hamming distance between two sequences.

Calculate the number of positions in which

Parameters:
s1The first sequence
s2The second sequence
Returns:
The hamming distance between s1 and s2
int hamming_bound ( const char *  s1,
const char *  s2,
int  n 
)

Calculate hamming distance between two sequences up to a specified length.

This function is similar to hamming() but instead of comparing both sequences up to their actual length only the first 'n' characters are taken into account

Parameters:
s1The first sequence
s2The second sequence
Returns:
The hamming distance between s1 and s2
char* get_line ( FILE *  fp)

Read a line of arbitrary length from a stream.

Returns a pointer to the resulting string. The necessary memory is allocated and should be released using free() when the string is no longer needed.

Parameters:
fpA file pointer to the stream where the function should read from
Returns:
A pointer to the resulting string
unsigned int get_input_line ( char **  string,
unsigned int  options 
)

Retrieve a line from 'stdin' savely while skipping comment characters and other features This function returns the type of input it has read if recognized. An option argument allows to switch between different reading modes.
Currently available options are:
#VRNA_INPUT_NOPRINT_COMMENTS, VRNA_INPUT_NOSKIP_COMMENTS, #VRNA_INPUT_NOELIM_WS_SUFFIX

pass a collection of options as one value like this:

get_input_line(string, option_1 | option_2 | option_n) 

If the function recognizes the type of input, it will report it in the return value. It also reports if a user defined 'quit' command (@-sign on 'stdin') was given. Possible return values are:
VRNA_INPUT_FASTA_HEADER, VRNA_INPUT_ERROR, VRNA_INPUT_MISC, VRNA_INPUT_QUIT

Parameters:
stringA pointer to the character array that contains the line read
optionsA collection of options for switching the functions behavior
Returns:
A flag with information about what has been read
unsigned int read_record ( char **  header,
char **  sequence,
char ***  rest,
unsigned int  options 
)

Get a data record from stdin.

This function may be used to obtain complete datasets from stdin. A dataset is always
defined to contain at least a sequence. If data on stdin starts with a fasta header,
i.e. a line like
@verbatim >some header info 

then read_record() will assume that the sequence that follows the header may span over several lines. To disable this behavior and to assign a single line to the argument 'sequence' one can pass VRNA_INPUT_NO_SPAN in the 'options' argument. If no fasta header is read in the beginning of a data block, a sequence must not span over multiple lines!
Unless the options VRNA_INPUT_NOSKIP_COMMENTS or VRNA_INPUT_NOSKIP_BLANK_LINES are passed, a sequence may be interrupted by lines starting with a comment character or empty lines.
A sequence is regarded as completely read if it was either assumed to not span over multiple lines, a secondary structure or structure constraint follows the sequence on the next line or a new header marks the beginning of a new sequence...
All lines following the sequence (this includes comments) and not initiating a new dataset are available through the line-array 'rest'. Here one can usually find the structure constraint or other information belonging to the current dataset. Filling of 'rest' may be prevented by passing VRNA_INPUT_NO_REST to the options argument.

Note:
This function will exit any program with an error message if no sequence could be read!

The main purpose of this function is to be able to easily parse blocks of data from stdin in the header of a loop where all calculations for the appropriate data is done inside the loop. The loop may be then left on certain return values, e.g.:

char *id, *seq, **rest; int i; while(!(read_record(&id, &seq, &rest, 0) & (VRNA_INPUT_ERROR | VRNA_INPUT_QUIT))){ if(id) printf("%s\n", id); printf("%s\n", seq); if(rest) for(i=0;rest[i];i++) printf("%s\n", rest[i]); }

In the example above, the while loop will be terminated when read_record() returns either an
error or a user initiated quit request.\n
As long as data is read from stdin, the id is printed if it is available for the current block
of data. The sequence will be printed in any case and if some more lines belong to the current
block of data each line will be printed as well.

\note Do not forget to free the memory occupied by header, sequence and rest!

\param  header    A pointer which will be set such that it points to the header of the record
\param  sequence  A pointer which will be set such that it points to the sequence of the record
\param  rest      A pointer which will be set such that it points to an array of lines which also belong to the record
\param  options   Some options which may be passed to alter the behavior of the function, use 0 for no options
\return           A flag with information about what the function actually did read
char* pack_structure ( const char *  struc)

Pack secondary secondary structure, 5:1 compression using base 3 encoding.

Returns a binary string encoding of the secondary structure using a 5:1 compression scheme. The string is NULL terminated and can therefore be used with standard string functions such as strcmp(). Useful for programs that need to keep many structures in memory.

Parameters:
strucThe secondary structure in dot-bracket notation
Returns:
The binary encoded structure
char* unpack_structure ( const char *  packed)

Unpack secondary structure previously packed with pack_structure()

Translate a compressed binary string produced by pack_structure() back into the familiar dot-bracket notation.

Parameters:
packedThe binary encoded packed secondary structure
Returns:
The unpacked secondary structure in dot-bracket notation
short* make_pair_table ( const char *  structure)

Create a pair table of a secondary structure.

Returns a newly allocated table, such that table[i]=j if (i.j) pair or 0 if i is unpaired, table[0] contains the length of the structure.

Parameters:
structureThe secondary structure in dot-bracket notation
Returns:
A pointer to the created pair_table
short* copy_pair_table ( const short *  pt)

Get an exact copy of a pair table.

Parameters:
ptThe pair table to be copied
Returns:
A pointer to the copy of 'pt'
short* alimake_pair_table ( const char *  structure)

***Pair table for snoop align

short* make_pair_table_snoop ( const char *  structure)

returns a newly allocated table, such that: table[i]=j if (i.j) pair or 0 if i is unpaired, table[0] contains the length of the structure. The special pseudoknotted H/ACA-mRNA structure is taken into account.

int* make_loop_index_pt ( short *  pt)

Compute the "base pair" distance between two secondary structures s1 and s2.

The sequences should have the same length. dist = number of base pairs in one structure but not in the other same as edit distance with open-pair close-pair as move-set

Parameters:
str1First structure in dot-bracket notation
str2Second structure in dot-bracket notation
Returns:
The base pair distance between str1 and str2
void print_tty_input_seq ( void  )

Print a line to stdout that asks for an input sequence.

There will also be a ruler (scale line) printed that helps orientation of the sequence positions

void print_tty_input_seq_str ( const char *  s)

Print a line with a user defined string and a ruler to stdout.

(usually this is used to ask for user input) There will also be a ruler (scale line) printed that helps orientation of the sequence positions

Parameters:
sA user defined string that will be printed to stdout
void print_tty_constraint ( unsigned int  option)

Print structure constraint characters to stdout. (constraint support is specified by option parameter)

Currently available options are:
VRNA_CONSTRAINT_PIPE (paired with another base)
VRNA_CONSTRAINT_DOT (no constraint at all)
VRNA_CONSTRAINT_X (base must not pair)
VRNA_CONSTRAINT_ANG_BRACK (paired downstream/upstream)
VRNA_CONSTRAINT_RND_BRACK (base i pairs base j)

pass a collection of options as one value like this:

print_tty_constraint(option_1 | option_2 | option_n) 
Parameters:
optionOption switch that tells which constraint help will be printed
void str_DNA2RNA ( char *  sequence)

Convert a DNA input sequence to RNA alphabet.

This function substitudes T and t with U and u, respectively

Parameters:
sequenceThe sequence to be converted
void str_uppercase ( char *  sequence)

Convert an input sequence to uppercase.

Parameters:
sequenceThe sequence to be converted
int* get_iindx ( unsigned int  length)

Get an index mapper array (iindx) for accessing the energy matrices, e.g. in partition function related functions.

Access of a position "(i,j)" is then accomplished by using

(i,j) ~ iindx[i]-j 

This function is necessary as most of the two-dimensional energy matrices are actually one-dimensional arrays throughout the ViennaRNAPackage

Consult the implemented code to find out about the mapping formula ;)

See also:
get_indx()
Parameters:
lengthThe length of the RNA sequence
Returns:
The mapper array
int* get_indx ( unsigned int  length)

Get an index mapper array (indx) for accessing the energy matrices, e.g. in MFE related functions.

Access of a position "(i,j)" is then accomplished by using

(i,j) ~ indx[j]+i 

This function is necessary as most of the two-dimensional energy matrices are actually one-dimensional arrays throughout the ViennaRNAPackage

Consult the implemented code to find out about the mapping formula ;)

See also:
get_iindx()
Parameters:
lengthThe length of the RNA sequence
Returns:
The mapper array
void constrain_ptypes ( const char *  constraint,
unsigned int  length,
char *  ptype,
int *  BP,
int  min_loop_size,
unsigned int  idx_type 
)

Insert constraining pair types according to constraint structure string.

See also:
get_indx(), get_iindx()
Parameters:
constraintThe structure constraint string
lengthThe actual length of the sequence (constraint may be shorter)
ptypeA pointer to the basepair type array
min_loop_sizeThe minimal loop size (usually TURN )
idx_typeDefine the access type for base pair type array (0 = indx, 1 = iindx)

Variable Documentation

unsigned short xsubi[3]

Current 48 bit random number.

This variable is used by urn(). These should be set to some random number seeds before the first call to urn().

See also:
urn()