X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=binaries%2Fsrc%2FViennaRNA%2FH%2Futils.h;fp=binaries%2Fsrc%2FViennaRNA%2FH%2Futils.h;h=68b66ce9a124edc6a91d7f9e662e080aa45c5463;hb=7522ace91fc0804a9719dbac9f68bc8154da3132;hp=0000000000000000000000000000000000000000;hpb=8116c0444fe98e8eb21bcdd8ded06e1429085823;p=jabaws.git diff --git a/binaries/src/ViennaRNA/H/utils.h b/binaries/src/ViennaRNA/H/utils.h new file mode 100644 index 0000000..68b66ce --- /dev/null +++ b/binaries/src/ViennaRNA/H/utils.h @@ -0,0 +1,615 @@ +#ifndef __VIENNA_RNA_PACKAGE_UTILS_H__ +#define __VIENNA_RNA_PACKAGE_UTILS_H__ + +/** + * \file utils.h + * \brief Various utility- and helper-functions used throughout the Vienna RNA package + */ + +/** + * Output flag of \ref get_input_line(): "An ERROR has occured, maybe EOF" + */ +#define VRNA_INPUT_ERROR 1U +/** + * Output flag of \ref get_input_line(): "the user requested quitting the program" + */ +#define VRNA_INPUT_QUIT 2U +/** + * Output flag of \ref get_input_line(): "something was read" + */ +#define VRNA_INPUT_MISC 4U + +/** Input/Output flag of \ref get_input_line():\n + * if used as input option this tells get_input_line() that the data to be read should comply + * with the FASTA format + * + * the function will return this flag if a fasta header was read + */ +#define VRNA_INPUT_FASTA_HEADER 8U + +/** Input flag for get_input_line():\n + * Tell get_input_line() that we assume to read a nucleotide sequence + * + */ +#define VRNA_INPUT_SEQUENCE 16U + +/** Input flag for get_input_line():\n + * Tell get_input_line() that we assume to read a structure constraint + * + */ +#define VRNA_INPUT_CONSTRAINT 32U + +/** + * Input switch for \ref get_input_line(): + * "do not trunkate the line by eliminating white spaces at end of line" + */ +#define VRNA_INPUT_NO_TRUNCATION 256U + +/** + * Input switch for read_record(): "do fill rest array" + */ +#define VRNA_INPUT_NO_REST 512U + +/** + * Input switch for read_record(): "never allow data to span more than one line" + */ +#define VRNA_INPUT_NO_SPAN 1024U + +/** + * Input switch for read_record(): "do not skip empty lines" + */ +#define VRNA_INPUT_NOSKIP_BLANK_LINES 2048U + +/** + * Output flag for read_record(): "read an empty line" + */ +#define VRNA_INPUT_BLANK_LINE 4096U + +/** + * Input switch for \ref get_input_line(): "do not skip comment lines" + */ +#define VRNA_INPUT_NOSKIP_COMMENTS 128U + +/** + * Output flag for read_record(): "read a comment" + */ +#define VRNA_INPUT_COMMENT 8192U + + + + +/** + * pipe sign '|' switch for structure constraints (paired with another base) + */ +#define VRNA_CONSTRAINT_PIPE 1U +/** + * dot '.' switch for structure constraints (no constraint at all) + */ +#define VRNA_CONSTRAINT_DOT 2U +/** + * 'x' switch for structure constraint (base must not pair) + */ +#define VRNA_CONSTRAINT_X 4U +/** + * angle brackets '<', '>' switch for structure constraint (paired downstream/upstream) + */ +#define VRNA_CONSTRAINT_ANG_BRACK 8U +/** + * round brackets '(',')' switch for structure constraint (base i pairs base j) + */ +#define VRNA_CONSTRAINT_RND_BRACK 16U +/** + * constraint may span over several lines + */ +#define VRNA_CONSTRAINT_MULTILINE 32U +/** + * do not print the header information line + */ +#define VRNA_CONSTRAINT_NO_HEADER 64U +/** + * placeholder for all constraining characters + */ +#define VRNA_CONSTRAINT_ALL 128U +/** + * '+' switch for structure constraint (base is involved in a gquad) + */ +#define VRNA_CONSTRAINT_G 256U + + + +/** + * Tell a function that an input is assumed to span several lines if used as input-option + * A function might also be returning this state telling that it has read data from + * multiple lines. + * + * \see extract_record_rest_structure(), read_record(), getConstraint() + * + */ +#define VRNA_OPTION_MULTILINE 32U + + +/** + * Get the minimum of two comparable values + */ +#define MIN2(A, B) ((A) < (B) ? (A) : (B)) +/** + * Get the maximum of two comparable values + */ +#define MAX2(A, B) ((A) > (B) ? (A) : (B)) +/** + * Get the minimum of three comparable values + */ +#define MIN3(A, B, C) (MIN2( (MIN2((A),(B))) ,(C))) +/** + * Get the maximum of three comparable values + */ +#define MAX3(A, B, C) (MAX2( (MAX2((A),(B))) ,(C))) + + +/** + * Stringify a macro after expansion + */ +#define XSTR(s) STR(s) +/** + * Stringify a macro argument + */ +#define STR(s) #s + +#ifndef FILENAME_MAX_LENGTH +/** + * \brief Maximum length of filenames that are generated by our programs + * + * This definition should be used throughout the complete ViennaRNA package + * wherever a static array holding filenames of output files is declared. + */ +#define FILENAME_MAX_LENGTH 80 +/** + * \brief Maximum length of id taken from fasta header for filename generation + * + * this has to be smaller than FILENAME_MAX_LENGTH since in most cases, + * some suffix will be appended to the ID + */ +#define FILENAME_ID_LENGTH 42 +#endif + + +#ifdef HAVE_CONFIG_H +#include +#ifndef HAVE_STRDUP +char *strdup(const char *s); +#endif +#endif +#ifdef WITH_DMALLOC +/* use dmalloc library to check for memory management bugs */ +#include "dmalloc.h" +#define space(S) calloc(1,(S)) +#else + +/** + * \brief Allocate space safely + * + * \param size The size of the memory to be allocated in bytes + * \return A pointer to the allocated memory + */ +/*@only@*/ /*@notnull@*/ +void *space(unsigned size) /*@ensures MaxSet(result) == (size-1);@*/; + +/** + * \brief Reallocate space safely + * + * \param p A pointer to the memory region to be reallocated + * \param size The size of the memory to be allocated in bytes + * \return A pointer to the newly allocated memory + */ +/*@only@*/ /*@notnull@*/ +void *xrealloc(/*@null@*/ /*@only@*/ /*@out@*/ /*@returned@*/ void *p, + unsigned size) /*@modifies *p @*/ /*@ensures MaxSet(result) == (size-1) @*/; +#endif + +/** + * \brief Die with an error message + * + * \see warn_user() + * \param message The error message to be printed before exiting with 'FAILURE' + */ +/*@exits@*/ +void nrerror(const char message[]); + +/** + * \brief Print a warning message + * + * Print a warning message to \e stderr + * + * \param message The warning message + */ +void warn_user(const char message[]); + +/** + * \brief Make random number seeds + */ +void init_rand(void); + +/** + * \brief Current 48 bit random number + * + * This variable is used by urn(). These should be set to some + * random number seeds before the first call to urn(). + * + * \see urn() + */ +extern unsigned short xsubi[3]; + +/** + * \brief get a random number from [0..1] + * + * \note Usually implemented by calling \e erand48(). + * \return A random number in range [0..1] + */ +double urn(void); + +/** + * \brief Generates a pseudo random integer in a specified range + * + * \param from The first number in range + * \param to The last number in range + * \return A pseudo random number in range [from, to] + */ +int int_urn(int from, int to); + +void filecopy(FILE *from, FILE *to); /* inefficient `cp' */ + +/** + * \brief Get a timestamp + * + * Returns a string containing the current date in the format + * \verbatim Fri Mar 19 21:10:57 1993\endverbatim + * + * \return A string containing the timestamp + */ +/*@observer@*/ +char *time_stamp(void); + +/** + * \brief Create a random string using characters from a specified symbol set + * + * \param l The length of the sequence + * \param symbols The symbol set + * \return A random string of length 'l' containing characters from the symbolset + */ +/*@only@*/ /*@notnull@*/ +char *random_string(int l, const char symbols[]); + +/** + * \brief Calculate hamming distance between two sequences + * + * Calculate the number of positions in which + * \param s1 The first sequence + * \param s2 The second sequence + * \return The hamming distance between s1 and s2 + */ +int hamming(const char *s1, const char *s2); + +/** + * \brief Calculate hamming distance between two sequences up to a specified length + * + * This function is similar to hamming() but instead of comparing both sequences + * up to their actual length only the first 'n' characters are taken into account + * \param s1 The first sequence + * \param s2 The second sequence + * \return The hamming distance between s1 and s2 + */ +int hamming_bound(const char *s1, const char *s2, int n); + +/** + * \brief Read a line of arbitrary length from a stream + * + * Returns a pointer to the resulting string. The necessary memory is + * allocated and should be released using \e free() when the string is + * no longer needed. + * + * \param fp A file pointer to the stream where the function should read from + * \return A pointer to the resulting string + */ +/*@only@*/ /*@null@*/ +char *get_line(FILE *fp); + +int skip_comment_lines(char **line); + +/** + * Retrieve a line from 'stdin' savely while skipping comment characters and + * other features + * This function returns the type of input it has read if recognized. + * An option argument allows to switch between different reading modes.\n + * Currently available options are:\n + * #VRNA_INPUT_NOPRINT_COMMENTS, #VRNA_INPUT_NOSKIP_COMMENTS, #VRNA_INPUT_NOELIM_WS_SUFFIX + * + * pass a collection of options as one value like this: + * \verbatim get_input_line(string, option_1 | option_2 | option_n) \endverbatim + * + * If the function recognizes the type of input, it will report it in the return + * value. It also reports if a user defined 'quit' command (@-sign on 'stdin') + * was given. Possible return values are:\n + * #VRNA_INPUT_FASTA_HEADER, #VRNA_INPUT_ERROR, #VRNA_INPUT_MISC, #VRNA_INPUT_QUIT + * + * \param string A pointer to the character array that contains the line read + * \param options A collection of options for switching the functions behavior + * \return A flag with information about what has been read + */ +unsigned int get_input_line(char **string, + unsigned int options); + +unsigned int get_multi_input_line(char **string, + unsigned int options); + +/** + * \brief Get a data record from stdin + * + * This function may be used to obtain complete datasets from stdin. A dataset is always + * defined to contain at least a sequence. If data on stdin starts with a fasta header, + * i.e. a line like + * \verbatim >some header info \endverbatim + * then read_record() will assume that the sequence that follows the header may span + * over several lines. To disable this behavior and to assign a single line to the argument + * 'sequence' one can pass VRNA_INPUT_NO_SPAN in the 'options' argument. + * If no fasta header is read in the beginning of a data block, a sequence must not span over + * multiple lines!\n + * Unless the options #VRNA_INPUT_NOSKIP_COMMENTS or #VRNA_INPUT_NOSKIP_BLANK_LINES are passed, + * a sequence may be interrupted by lines starting with a comment character or empty lines.\n + * A sequence is regarded as completely read if it was either assumed to not span over multiple + * lines, a secondary structure or structure constraint follows the sequence on the next line + * or a new header marks the beginning of a new sequence...\n + * All lines following the sequence (this includes comments) and not initiating a new dataset are + * available through the line-array 'rest'. Here one can usually find the structure constraint or + * other information belonging to the current dataset. Filling of 'rest' may be prevented by + * passing #VRNA_INPUT_NO_REST to the options argument.\n + * + * \note This function will exit any program with an error message if no sequence could be read! + * + * The main purpose of this function is to be able to easily parse blocks of data from stdin + * in the header of a loop where all calculations for the appropriate data is done inside the + * loop. The loop may be then left on certain return values, e.g.: + * \verbatim +char *id, *seq, **rest; +int i; +while(!(read_record(&id, &seq, &rest, 0) & (VRNA_INPUT_ERROR | VRNA_INPUT_QUIT))){ + if(id) printf("%s\n", id); + printf("%s\n", seq); + if(rest) + for(i=0;rest[i];i++) + printf("%s\n", rest[i]); +} \endverbatim + * + * In the example above, the while loop will be terminated when read_record() returns either an + * error or a user initiated quit request.\n + * As long as data is read from stdin, the id is printed if it is available for the current block + * of data. The sequence will be printed in any case and if some more lines belong to the current + * block of data each line will be printed as well. + * + * \note Do not forget to free the memory occupied by header, sequence and rest! + * + * \param header A pointer which will be set such that it points to the header of the record + * \param sequence A pointer which will be set such that it points to the sequence of the record + * \param rest A pointer which will be set such that it points to an array of lines which also belong to the record + * \param options Some options which may be passed to alter the behavior of the function, use 0 for no options + * \return A flag with information about what the function actually did read + */ +unsigned int read_record( char **header, + char **sequence, + char ***rest, + unsigned int options); + + +/* \brief Extract a dot-bracket structure string from (multiline)character array + * + * This function extracts a dot-bracket structure string from the 'rest' array as + * returned by read_record() and returns it. All occurences of comments within the + * 'lines' array will be skipped as long as they do not break the structure string. + * If no structure could be read, this function returns NULL. + * + * \see read_record() + * + * \param lines The (multiline) character array to be parsed + * \param length The assumed length of the dot-bracket string (passing a value < 1 results in no length limit) + * \param option Some options which may be passed to alter the behavior of the function, use 0 for no options + * \return The dot-bracket string read from lines or NULL + */ +char *extract_record_rest_structure(const char **lines, + unsigned int length, + unsigned int option); + +/** + * \brief Pack secondary secondary structure, 5:1 compression using base 3 encoding + * + * Returns a binary string encoding of the secondary structure using + * a 5:1 compression scheme. The string is NULL terminated and can + * therefore be used with standard string functions such as strcmp(). + * Useful for programs that need to keep many structures in memory. + * + * \param struc The secondary structure in dot-bracket notation + * \return The binary encoded structure + */ +char *pack_structure(const char *struc); + +/** + * \brief Unpack secondary structure previously packed with pack_structure() + * + * Translate a compressed binary string produced by pack_structure() back into + * the familiar dot-bracket notation. + * + * \param packed The binary encoded packed secondary structure + * \return The unpacked secondary structure in dot-bracket notation + */ +char *unpack_structure(const char *packed); + +/** + * \brief Create a pair table of a secondary structure + * + * Returns a newly allocated table, such that table[i]=j if (i.j) pair + * or 0 if i is unpaired, table[0] contains the length of the structure. + * + * \param structure The secondary structure in dot-bracket notation + * \return A pointer to the created pair_table + */ +short *make_pair_table(const char *structure); + +short *make_pair_table_pk(const char *structure); + +/** + * \brief Get an exact copy of a pair table + * + * \param pt The pair table to be copied + * \return A pointer to the copy of 'pt' + */ +short *copy_pair_table(const short *pt); + +/** +***Pair table for snoop align +*** +*** +**/ +short *alimake_pair_table(const char *structure); + +/** +*** returns a newly allocated table, such that: table[i]=j if (i.j) pair or +*** 0 if i is unpaired, table[0] contains the length of the structure. +*** The special pseudoknotted H/ACA-mRNA structure is taken into account. +**/ +short *make_pair_table_snoop(const char *structure); + +/** + * \brief Compute the "base pair" distance between two secondary structures s1 and s2. + * + * The sequences should have the same length. + * dist = number of base pairs in one structure but not in the other + * same as edit distance with open-pair close-pair as move-set + * + * \param str1 First structure in dot-bracket notation + * \param str2 Second structure in dot-bracket notation + * \return The base pair distance between str1 and str2 + */ + +int *make_loop_index_pt(short *pt); + + +int bp_distance(const char *str1, + const char *str2); + +/** + * \brief Print a line to \e stdout that asks for an input sequence + * + * There will also be a ruler (scale line) printed that helps orientation of the sequence positions + */ +void print_tty_input_seq(void); + +/** + * \brief Print a line with a user defined string and a ruler to stdout. + * + * (usually this is used to ask for user input) + * There will also be a ruler (scale line) printed that helps orientation of the sequence positions + * + * \param s A user defined string that will be printed to stdout + */ +void print_tty_input_seq_str(const char *s); + +/** + * \brief Print structure constraint characters to stdout + * (full constraint support) + * + */ +void print_tty_constraint_full(void); + +/** + * \brief Print structure constraint characters to stdout. + * (constraint support is specified by option parameter) + * + * Currently available options are:\n + * #VRNA_CONSTRAINT_PIPE (paired with another base)\n + * #VRNA_CONSTRAINT_DOT (no constraint at all)\n + * #VRNA_CONSTRAINT_X (base must not pair)\n + * #VRNA_CONSTRAINT_ANG_BRACK (paired downstream/upstream)\n + * #VRNA_CONSTRAINT_RND_BRACK (base i pairs base j)\n + * + * pass a collection of options as one value like this: + * \verbatim print_tty_constraint(option_1 | option_2 | option_n) \endverbatim + * + * \param option Option switch that tells which constraint help will be printed + */ +void print_tty_constraint(unsigned int option); + +/** + * \brief Convert a DNA input sequence to RNA alphabet + * + * This function substitudes T and t with U and u, respectively + * + * \param sequence The sequence to be converted + */ +void str_DNA2RNA(char *sequence); + +/** + * \brief Convert an input sequence to uppercase + * + * \param sequence The sequence to be converted + */ +void str_uppercase(char *sequence); + +/** + * \brief Get an index mapper array (iindx) for accessing the energy matrices, e.g. in partition function related functions. + * + * Access of a position "(i,j)" is then accomplished by using \verbatim (i,j) ~ iindx[i]-j \endverbatim + * This function is necessary as most of the two-dimensional energy matrices are actually one-dimensional arrays throughout + * the ViennaRNAPackage + * + * Consult the implemented code to find out about the mapping formula ;) + * + * \see get_indx() + * \param length The length of the RNA sequence + * \return The mapper array + */ +int *get_iindx(unsigned int length); + +/** + * \brief Get an index mapper array (indx) for accessing the energy matrices, e.g. in MFE related functions. + * + * Access of a position "(i,j)" is then accomplished by using \verbatim (i,j) ~ indx[j]+i \endverbatim + * This function is necessary as most of the two-dimensional energy matrices are actually one-dimensional arrays throughout + * the ViennaRNAPackage + * + * Consult the implemented code to find out about the mapping formula ;) + * + * \see get_iindx() + * \param length The length of the RNA sequence + * \return The mapper array + * + */ +int *get_indx(unsigned int length); + +void getConstraint( char **cstruc, + const char **lines, + unsigned int option); + +/** + * \brief Insert constraining pair types according to constraint structure string + * + * \see get_indx(), get_iindx() + * + * \param constraint The structure constraint string + * \param length The actual length of the sequence (constraint may be shorter) + * \param ptype A pointer to the basepair type array + * \param min_loop_size The minimal loop size (usually \ref TURN ) + * \param idx_type Define the access type for base pair type array (0 = indx, 1 = iindx) + */ +void constrain_ptypes(const char *constraint, + unsigned int length, + char *ptype, + int *BP, + int min_loop_size, + unsigned int idx_type); + +unsigned int *make_referenceBP_array(short *reference_pt, + unsigned int turn); + +unsigned int *compute_BPdifferences( short *pt1, + short *pt2, + unsigned int turn); + +#endif