From: pvtroshin Date: Thu, 16 Jun 2011 13:10:17 +0000 (+0000) Subject: Modified IUPred to read multiple fasta as input X-Git-Url: http://source.jalview.org/gitweb/?a=commitdiff_plain;h=d2cacb3e9ad611207bc2e6b1dd61f2a6d0d90dc3;p=jabaws.git Modified IUPred to read multiple fasta as input git-svn-id: link to svn.lifesci.dundee.ac.uk/svn/barton/ptroshin/JABA2@4271 e3abac25-378b-4346-85de-24260fe3988d --- diff --git a/binaries/src/iupred/Makefile b/binaries/src/iupred/Makefile index aef5c84..0da3282 100644 --- a/binaries/src/iupred/Makefile +++ b/binaries/src/iupred/Makefile @@ -1,8 +1,8 @@ -CFLAGS = -O3 -funroll-loops -Winline -LDLIBS = -lm +CFLAGS = -O3 -funroll-loops -Winline -I./ +LDLIBS = -lm -OBJ = .o -EXE = +OBJ = *.o +EXE = iupred RM = rm -f CP = cp @@ -22,4 +22,7 @@ $(CPPOBJ): %.o: %.c iupred: $(CPPOBJ) $(LD) -o iupred $(CPPOBJ) $(LDLIBS) - strip iupred + strip $(EXE) +clean: + $(RM) $(CPPOBJ) + $(RM) $(EXE) diff --git a/binaries/src/iupred/P53_HUMAN.seq b/binaries/src/iupred/P53_HUMAN.seq new file mode 100644 index 0000000..60cd7a7 --- /dev/null +++ b/binaries/src/iupred/P53_HUMAN.seq @@ -0,0 +1,8 @@ +>P53_HUMAN +MEEPQSDPSVEPPLSQETFSDLWKLLPENNVLSPLPSQAMDDLMLSPDDIEQWFTEDPGP +DEAPRMPEAAPPVAPAPAAPTPAAPAPAPSWPLSSSVPSQKTYQGSYGFRLGFLHSGTAK +SVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHHE +RCSDSDGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNS +SCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENLRKKGEPHHELP +PGSTKRALPNNTSSSPQPKKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPG +GSRAHSSHLKSKKGQSTSRHKKLMFKTEGPDSD diff --git a/binaries/src/iupred/getquery.c b/binaries/src/iupred/getquery.c new file mode 100644 index 0000000..79e9bc5 --- /dev/null +++ b/binaries/src/iupred/getquery.c @@ -0,0 +1,120 @@ +#include +#include +#include +#include + +#include "getquery.h" +#include "gjutil.h" + +/* get the next sequence entry from a fasta format file and + return a pointer to a structure containing this information. + + Format expected of the input file is: + + >IDENT Title here + ONE LETTER CODE SEQUENCE ON SEVERAL LINES + LIKE THIS + >NEXTIDENT Next title etc. + + The routine reads lines from the file until it finds one that starts + in '>'. It then reads this line as an ID, title line before reading + all alphabetic characters that follow as the amino acid sequence. + The sequence is terminated by the next '>' or End of File. + + This means that the must ONLY contain sequences. PIR format permits + more flexibility in the input file. + + NOTE: This routine assumes that no line will be longer than the + p->MAX_BUFFER_LEN. Any program that calls this routine should + have p->MAX_BUFFER_LEN set suitably big. This can be done by + pre-checking the database file for line lengths. + + Author: G. J. Barton (October 1995) + +*/ + +SEQS *gseq_fasta(FILE *seqfile) +{ + + char *buff,*tbuff; + SEQS *ret_val; + int MAX_BUFFER_LEN = 10000; + int MAX_SEQ_LEN = 10000; + char *ident = NULL; + char *title = NULL; + + GJ_S_COUNT j; + int c; + STD_FILES; + + buff = (char *) GJmalloc(sizeof(char) * MAX_BUFFER_LEN); + tbuff = buff; + + while((buff = fgets(buff,MAX_BUFFER_LEN,seqfile)) != NULL) { + if(buff[0] == '>') { + ret_val = (SEQS *) GJmalloc(sizeof(SEQS)); + ident = strtok(&buff[1]," "); + if(ident != NULL) { + ident=GJremovechar2(ident,'\n'); + ret_val->id = GJstrdup(ident); + ret_val->ilen = strlen(ident); + }else { + GJerror("Something strange with sequence identifier in fasta file"); + fprintf(std_err,"Line:%s\n",buff); + return NULL; + } + title = strtok(NULL,"\n"); + if(title != NULL) { + ret_val->title = GJstrdup(title); + ret_val->tlen = strlen(title); + }else { + /* GJerror("Something strange with sequence title in fasta file");*/ + /* fprintf(std_err,"Line:%s\n",buff);*/ + /*if(p->VERBOSE > 10)fprintf(std_err,"Title missing in FASTA file - Inserting dummy: %s\n",buff);*/ + ret_val->title = GJstrdup("-"); + ret_val->tlen = strlen(ret_val->title); + /* return NULL;*/ + } + ret_val->seq = (char *) GJmalloc(sizeof(char) * MAX_SEQ_LEN); + ret_val->seq[0] = ' '; + j = 0; + for(;;) { + c = fgetc(seqfile); + if(c == EOF || c == '>') { + ungetc(c,seqfile); + ret_val->seq[j] = '\0'; + ret_val->slen = j; + ret_val->seq = (char *) GJrealloc(ret_val->seq,sizeof(char) * ret_val->slen); + GJfree(buff); + return ret_val; + }else if(isalpha(c)) { + if(j == (MAX_SEQ_LEN - 3)){ + fprintf(std_err,"Sequence too long: %s (> %d residues): Increase MAX_SEQ_LEN\n", + ret_val->id,j); + exit(-1); + } + ret_val->seq[j++] = toupper(c); + } + } + } + } + GJfree(tbuff); + return(NULL); +} + +/* +int main(int argc, char **argv) +{ + FILE *fasta; + SEQS *seq; + fasta = fopen("/homes/pvtroshin/large.fasta", "r"); + + do{ + seq = gseq_fasta(fasta); + if(seq!=NULL) printf("Seq: %s\n",seq->id ); + } while(seq != NULL); + + fclose(fasta); + return 0; +} +*/ diff --git a/binaries/src/iupred/getquery.h b/binaries/src/iupred/getquery.h new file mode 100644 index 0000000..7668552 --- /dev/null +++ b/binaries/src/iupred/getquery.h @@ -0,0 +1,20 @@ +#include +#include +#include +#include + +#include "gjutil.h" + +/* Standard structure for storing protein sequence data */ + +typedef struct seqdat { /* all lengths include char terminator and [0] */ + char *id; /* identifier */ + int ilen; + char *title;/* title */ + int tlen; + int slen; /* length of sequence*/ + char *seq; /* sequence */ +}SEQS; + + +SEQS *gseq_fasta(FILE *seqfile); diff --git a/binaries/src/iupred/gjutil.c b/binaries/src/iupred/gjutil.c new file mode 100644 index 0000000..181b320 --- /dev/null +++ b/binaries/src/iupred/gjutil.c @@ -0,0 +1,1201 @@ + +/**************************************************************************** + +gjutil.c: Various utility routines - error checking malloc and +free, string functions etc... + +Copyright: Geoffrey J. Barton (1992, 1993, 1995, 1997) +email: geoff@ebi.ac.uk + +This software is made available for educational and non-commercial research +purposes. + +For commercial use, a commercial licence is required - contact the author +at the above address for details. + + +******************************************************************************/ +#include +#include +#include +#include +#include +#include +#include + +#include + +/* define pointers for standard streams to allow redefinition to files */ + +FILE *std_err; +FILE *std_in; +FILE *std_out; + +/* clock externs */ +clock_t start_time, end_time,initial_time,final_time; + +void *GJmalloc(size_t size) +/* malloc with simple error check */ +/* G. J. Barton 1992 */ +{ + void *ptr; + ptr = (void *) malloc(size); + if(ptr == NULL){ + GJerror("malloc error"); + exit(0); + } + return ptr; +} + +void *GJrealloc(void *ptr,size_t size) +/* realloc with error check */ +/* G. J. Barton 1992 */ +{ + ptr = (void *) realloc(ptr,size); + if(ptr == NULL){ + GJerror("realloc error"); + exit(0); + } + return ptr; +} +void *GJmallocNQ(size_t size) +/* as for GJmalloc, but returns NULL on error*/ +/* G. J. Barton 1992 */ +{ + void *ptr; + ptr = (void *) malloc(size); + if(ptr == NULL){ + GJerror("malloc error"); + return NULL; + } + return ptr; +} + +void *GJreallocNQ(void *ptr,size_t size) +/* as for GJrealloc with error check but returns NULL on error*/ +/* G. J. Barton 1992 */ +{ + ptr = (void *) realloc(ptr,size); + if(ptr == NULL){ + GJerror("realloc error"); + return NULL; + } + return ptr; +} +void GJfree(void *ptr) +/* free with error check */ +/* G. J. Barton 1992 */ +{ + if(ptr == NULL){ + GJerror("Attempt to free NULL pointer"); + exit(0); + } + free(ptr); +} + +void GJerror(const char *prefix) +/* writes error message contained in prefix and contents of errno + to std_err. +*/ +/* G. J. Barton 1992 */ +{ + if(prefix != NULL){ + if(*prefix != '\0'){ + fprintf(std_err,"%s: ",prefix); + } + } + fprintf(std_err,"%s\n",strerror(errno)); +} + +/* +error: calls GJerror +*/ +void error(const char *str,int flag) +{ + GJerror(str); + if(flag)exit(0); +} + + +char *GJstoupper(const char *s) +/* return a copy of s in upper case */ +/* G. J. Barton 1992 */ +{ + char *temp; + int i; + temp = GJstrdup(s); + i=0; + while(temp[i] != '\0'){ + temp[i] = toupper(temp[i]); + ++i; + } + return temp; +} +char *GJstolower(const char *s) +/* return a copy of s in lower case */ +/* G. J. Barton 1992 */ +{ + char *temp; + int i; + temp = GJstrdup(s); + i=0; + while(temp[i] != '\0'){ + temp[i] = tolower(temp[i]); + ++i; + } + return temp; +} +char *GJstoup(char *s) +/* return s in upper case */ +/* G. J. Barton 1992 */ +{ + int i; + i=0; + while(s[i] != '\0'){ + s[i] = toupper(s[i]); + ++i; + } + return s; +} +char *GJstolo(char *s) +/* return s in lower case */ +/* G. J. Barton 1992 */ +{ + int i; + i=0; + while(s[i] != '\0'){ + s[i] = tolower(s[i]); + ++i; + } + return s; +} + +char *GJstrdup(const char *s) +/* returns a pointer to a copy of string s */ +/* G. J. Barton 1992 */ + +{ + char *temp; + temp = (char *) GJmalloc(sizeof(char) * (strlen(s)+1)); + temp = strcpy(temp,s); + return temp; +} + +char *GJstrrename(char *old,const char *new) +/* takes old which is a pointer to a string, then replaces the contents + of the string with new, reallocating to the correct size +*/ +{ + int nlen; + nlen = strlen(new); + old = (char *) GJrealloc(old,sizeof(char) * (nlen + 1)); + old = strcpy(old,new); + return old; +} + + + +FILE *GJfopen(const char *fname,const char *type,int action) +/* a file open function with error checking. The third argument +is set to 0 if we want a failed open to return, or 1 if we +want a failed open to exit the program. +*/ +/* G. J. Barton 1992 */ +/* modified July 1995 - error message only printed if action is 1 */ +{ + FILE *ret_val; + ret_val = fopen(fname,type); + if(ret_val == NULL){ + /* GJerror(strcat("Cannot Open File: ",fname));*/ + if(action == 1){ + GJerror(strcat("Cannot Open File: ",fname)); + exit(1); + } + } + return ret_val; +} + +int GJfclose(FILE *fp,int action) +/* a file close function with error checking. The second argument +is set to 0 if we want a failed close to return, or 1 if we +want a failed close to exit the program. +*/ +/* G. J. Barton 1992 */ +{ + int ret_val; + ret_val = fclose(fp); + if(ret_val != 0){ + if(action == 1){ + GJerror("Error closing File"); + exit(1); + } + } + return ret_val; +} + + +GJFILE *GJfilemake(const char *name,const char *type,int action) +/* If action = 1 then +Tries to open the file with the given name. If successful returns +a pointer to a struct file structure with the name and filehandle. If +the open fails, or action= 0 then returns a struct file structure +with the name and a NULL filehandle */ +/* G. J. Barton 1995 */ +{ + GJFILE *ret_val; + ret_val = (GJFILE *) GJmalloc(sizeof(GJFILE)); + ret_val->name = GJstrdup(name); + if(action == 1) { + ret_val->handle = GJfopen(ret_val->name,type,0); + }else if(action == 0){ + ret_val->handle = NULL; + } + return ret_val; +} + +GJFILE *GJfilerename(GJFILE *ret_val, const char *name) +/* When passed the fval structure - renames the name part of the +file structure to name, if the handle is non null it tries to close +the file, then sets the file handle to NULL. */ +/* G. J. Barton 1995 */ +{ + if(ret_val->name != NULL) { + GJfree(ret_val->name); + ret_val->name = GJstrdup(name); + } + if(ret_val->handle != NULL) { + GJfclose(ret_val->handle,0); + ret_val->handle = NULL; + } + return ret_val; +} + +GJFILE *GJfileclose(GJFILE *ret_val,int action) +/* Closes a file named in the struct file structure returns the struct + file structure */ + +/* G. J. Barton July 1995 */ +{ + STD_FILES; + + if(GJfclose(ret_val->handle,0) == 0){ + return ret_val; + }else { + if(action == 1){ + GJerror("Error closing File"); + fprintf(std_err,"%s\n",ret_val->name); + exit(-1); + } + } + return ret_val; +} + +GJFILE *GJfileopen(GJFILE *ret_val,const char *type,int action) +/* Opens a file named in the struct file structure */ + +/* G. J. Barton October 1995 */ +{ + STD_FILES; + + ret_val->handle = GJfopen(ret_val->name,type,0); + if(ret_val->handle == NULL){ + if(action == 1){ + GJerror("Error opening File"); + fprintf(std_err,"%s\n",ret_val->name); + exit(-1); + } + } + return ret_val; +} + +GJFILE *GJfileclean(GJFILE *ret_val,int action) +/* Closes the file then sets the file pointer to NULL, then + frees the filename string */ + +/* G. J. Barton July 1995 */ +{ + if(GJfclose(ret_val->handle,0) == 0) { + ret_val->handle = NULL; + GJfree(ret_val->name); + return ret_val; + }else { + if(action == 1){ + GJerror("Error closing File"); + fprintf(std_err,"%s\n",ret_val->name); + exit(-1); + } + } + return ret_val; +} + +void GJinitfile(void) +/* just set the standard streams */ +{ + std_err = stderr; + std_in = stdin; + std_out = stdout; +} + +char *GJfnonnull(char *string) +/* return pointer to first non null character in the string */ +/* this could cause problems if the string is not null terminated */ +{ + while(*string != '\0'){ + ++string; + } + return ++string; +} + +char *GJstrappend(char *string1, char *string2) +/* appends string2 to the end of string2. Any newline characters are removed +from string1, then the first character of string2 overwrites the null at the +end of string1. +string1 and string2 must have been allocated with malloc. +*/ +/* G. J. Barton July 1992 */ +{ + char *ret_val; + ret_val = GJremovechar(string1,'\n'); + ret_val = (char *) GJrealloc(ret_val, + sizeof(char) * (strlen(ret_val) + strlen(string2) + 1)); + ret_val = strcat(ret_val,string2); + return ret_val; +} + +char *GJremovechar(char *string,char c) +/* removes all instances of character c from string + returns a pointer to the reduced, null terminated string + 11/8/1996: couple of bugs found in this routine. + the length of the string returned was too short by 2 bytes. + This is a dodgy routine since string is freed. +*/ +/* G. J. Barton (July 1992) */ +{ + char *temp; + int j,i,nchar; + nchar = 0; + i=0; + while(string[i] != '\0'){ + if(string[i] == c){ + ++nchar; + } + ++i; + } + if(nchar == 0){ + return string; + }else{ + temp = (char *) GJmalloc(sizeof(char) * (strlen(string)-nchar + 1)); + j=0; + i=0; + while(string[i] != '\0'){ + if(string[i] != c){ + temp[j] = string[i]; + ++j; + } + ++i; + } + temp[j] = '\0'; + GJfree(string); + return temp; + } +} + +char *GJremovechar2(char *string,char c) +/* removes all instances of character c from string + returns a pointer to the reduced, null terminated string +*/ +/* G. J. Barton (July 1992) */ +{ + char *temp; + int i,k,len; + k=0; + len=strlen(string); + temp = (char *) GJmalloc(sizeof(char) * (len+1)); + for(i=0;i<(len+1);++i){ + if(string[i] != c){ + temp[k] = string[i]; + ++k; + } + } + for(i=0;i<(strlen(temp)+1);++i){ + string[i] = temp[i]; + } + GJfree(temp); + return string; +} + + +char *GJsubchar(char *string,char c2,char c1) +/* substitutes c1 for c2 in string +*/ +/* G. J. Barton (July 1992) */ +{ + int i; + + i=0; + while(string[i] != '\0'){ + if(string[i] == c1){ + string[i] = c2; + } + ++i; + } + return string; +} + +/* create a string and if fchar != NULL fill with characters */ +/* always set the len-1 character to '\0' */ + +char *GJstrcreate(size_t len,char *fchar) +{ + char *ret_val; + ret_val = (char *) GJmalloc(sizeof(char) * len); + --len; + ret_val[len] = '\0'; + if(fchar != NULL){ + while(len > -1){ + ret_val[len] = *fchar; + --len; + } + } + return ret_val; +} + +/* searches for string s2 in string s1 and returns pointer to first instance +of s2 in s1 or NULL if no instance found. s1 and s2 must be null terminated +*/ +char *GJstrlocate(char *s1, char *s2) +{ + int i=0; + int j=0; + int k; + if(strlen(s1) == 0 || strlen(s2) == 0) return NULL; + while(i < strlen(s1)){ + j=0; + k=i; + while(j < strlen(s2) && s1[k] == s2[j]){ + ++k; + ++j; + } + if(j == strlen(s2)) return &s1[i]; + ++i; + } + return NULL; +} +#include +#include + + +/* GJstrtok() + +This version of strtok places the work pointer at the location of the first +character in the next token, rather than just after the last character of the +current token. This is useful for extracting quoted strings +*/ + +char *GJstrtok(char *input_string,const char *token_list) +{ + static char *work; + char *return_ptr; + + if(input_string != NULL){ + /* first call */ + work = input_string; + } + + /* search for next non-token character */ + while(strchr(token_list,*work)!=NULL){ + ++work; + } + + if(*work == '\0'){ + /* if we've reached the end of string, then return NULL */ + return NULL; + }else{ + return_ptr = (char *) work; + while(strchr(token_list,*work) == NULL){ + if(*work == '\0'){ + /* end of the string */ + return return_ptr; + }else{ + ++work; + } + } + *work = '\0'; + ++work; + /* now increment work until we find the next non-delimiter character */ + while(strchr(token_list,*work) != NULL){ + if(*work == '\0'){ + break; + }else{ + ++work; + } + } + return return_ptr; + } +} +/************************************************************************** +return a pointer to space for a rectangular unsigned character array +Version 2.0 ANSI and uses GJmallocNQ +--------------------------------------------------------------------------*/ + +unsigned char **uchararr(int i,int j) +{ + unsigned char **temp; + int k, rowsiz; + + temp = (unsigned char **) GJmallocNQ(sizeof(unsigned char *) * i); + if(temp == NULL) return NULL; + + rowsiz = sizeof(unsigned char) * j; + + for (k = 0; k < i; ++k){ + temp[k] = (unsigned char *) GJmallocNQ(rowsiz); + if(temp[k] == NULL) return NULL; + } + return temp; +} + +/************************************************************************** +free up space pointed to by rectangular unsigned character array +-------------------------------------------------------------------------*/ +void ucharfree(unsigned char **array,int i) + +{ + int k; + + for (k = 0; k < i; ++k){ + GJfree((char *) array[k]); + } + GJfree((char *) array); + +} +/************************************************************************** +return a pointer to space for a rectangular double array +--------------------------------------------------------------------------*/ + +double **GJdarr(int i,int j) +{ + double **temp; + int k, rowsiz; + + temp = (double **) GJmallocNQ(sizeof(double *) * i); + if(temp == NULL) return NULL; + + rowsiz = sizeof(double) * j; + + for (k = 0; k < i; ++k){ + temp[k] = (double *) GJmallocNQ(rowsiz); + if(temp[k] == NULL) return NULL; + } + return temp; +} +void GJdarrfree(double **array,int i) + +{ + int k; + + for (k = 0; k < i; ++k){ + GJfree((char *) array[k]); + } + GJfree((char *) array); + +} + +/************************************************************************** +return a pointer to space for a rectangular signed character array +Version 2.0 ANSI +--------------------------------------------------------------------------*/ +signed char **chararr(int i,int j) + +{ + signed char **temp; + int k, rowsiz; + + temp = (signed char **) GJmallocNQ(sizeof(char *) * i); + + if(temp == NULL) return NULL; + + rowsiz = sizeof(char) * j; + + for (k = 0; k < i; ++k){ + temp[k] = (signed char *) GJmallocNQ(rowsiz); + if(temp[k] == NULL) return NULL; + } + return temp; +} + + +/* mcheck - check a call to malloc - if the call has failed, print the +error message and exit the program */ +/* ANSI Version - also uses GJerror routine and ptr is declared void*/ + +void mcheck(void *ptr,char *msg) + +{ + if(ptr == NULL){ + GJerror("malloc/realloc error"); + exit(0); + } +} + +/* set a string to blanks and add terminating nul */ +char *GJstrblank(char *string,int len) + +{ + --len; + string[len] = '\0'; + --len; + while(len > -1){ + string[len] = ' '; + --len; + } + return string; +} + +/* Initialise an unsigned char array */ +void GJUCinit(unsigned char **array,int i,int j,unsigned char val) +{ + int k,l; + + for(k=0;k0){ + token[i] = '\0'; + return token; + } + } +/* GJerror("End of File Encountered");*/ + GJfree(token); + return NULL; +} + +struct tokens *GJgettokens(const char *delims, char *buff) +/* This splits a buffer into tokens at each position defined by delims.*/ +/* The structure returned contains the number of tokens and the */ +/* tokens themselves as a char ** array */ +{ + char *token; + struct tokens *tok; + + token = strtok(buff,delims); + if(token == NULL) return NULL; + + tok = (struct tokens *) GJmalloc(sizeof(struct tokens)); + tok->ntok = 0; + tok->tok = (char **) GJmalloc(sizeof(char *)); + tok->tok[0] = GJstrdup(token); + ++tok->ntok; + while((token = strtok(NULL,delims)) != NULL) { + tok->tok = (char **) GJrealloc(tok->tok,sizeof(char *) * (tok->ntok+1)); + tok->tok[tok->ntok] = GJstrdup(token); + ++tok->ntok; + } + + return tok; +} + +void GJfreetokens(struct tokens *tok) +/* frees a tokens structure */ + +{ + int i; + for(i=0;intok;++i) { + GJfree(tok->tok[i]); + } + GJfree(tok->tok); + GJfree(tok); + tok = NULL; /* add this to avoid odd pointer 27/6/1997*/ +} + +char * GJtoktostr(struct tokens *tok,char delim,int s, int e) + +/* + returns a string with the tokens between s and e inclusive written to + it separated by delim + the tok structure is unmodified. +*/ + +{ + int n, i, j,k; + char *ret_val; + + n = 0; + + + if(s < 0 || s >= tok->ntok) s = 0; + if(e < 0 || e >= tok->ntok) e = tok->ntok - 1; + + for(i=s;i<=e;++i){ + n += strlen(tok->tok[i]); + ++n; + } + + ret_val = (char *) GJmalloc(sizeof(char) * n); + j = 0; + for(i=s;i<=e;++i){ + for(k=0;ktok[i]);++k){ + ret_val[j] = tok->tok[i][k]; + ++j; + } + ret_val[j++] = delim; + } + ret_val[n-1] = '\0'; + return ret_val; +} + + +void GJindexx(int *arrin,int n,int *indx) + /* indexed heap sort - adapted from the NR routine indexx. + inarr is an integer array to sort, + indx is the returned index array + */ +{ + int l,j,ir,indxt,i; +/* SMJS + float q; +*/ + int q; + + for (j=1;j<=n;j++) indx[j]=j; + l=(n >> 1) + 1; + ir=n; + for (;;) { + if (l > 1) + q=arrin[(indxt=indx[--l])]; + else { + q=arrin[(indxt=indx[ir])]; + indx[ir]=indx[1]; + if (--ir == 1) { + indx[1]=indxt; + return; + } + } + i=l; + j=l << 1; + while (j <= ir) { + if (j < ir && arrin[indx[j]] < arrin[indx[j+1]]) j++; + if (q < arrin[indx[j]]) { + indx[i]=indx[j]; + j += (i=j); + } + else j=ir+1; + } + indx[i]=indxt; + } +} + +void GJindexxD(double *arrin,int n,int *indx) + /* indexed heap sort - adapted from the NR routine indexx. + arrin is a double array to sort, + indx is the returned index array + */ +{ + int l,j,ir,indxt,i; +/* + float q; +*/ + double q; + + for (j=1;j<=n;j++) indx[j]=j; + l=(n >> 1) + 1; + ir=n; + for (;;) { + if (l > 1) + q=arrin[(indxt=indx[--l])]; + else { + q=arrin[(indxt=indx[ir])]; + indx[ir]=indx[1]; + if (--ir == 1) { + indx[1]=indxt; + return; + } + } + i=l; + j=l << 1; + while (j <= ir) { + if (j < ir && arrin[indx[j]] < arrin[indx[j+1]]) j++; + if (q < arrin[indx[j]]) { + indx[i]=indx[j]; + j += (i=j); + } + else j=ir+1; + } + indx[i]=indxt; + } +} + +void GJindexxS1(char **arrin,int n,int *indx) + /*indexed sort of a character array - this uses qsort rather than the + heapsort in GJindexxS. indx runs from 0..(n-1) rather than 1..n as in + GJindexxS. + */ +{ + int i; + CWORK *ret; + + ret = (CWORK *) GJmalloc(sizeof(CWORK) * n); + + for(i=0;ival,b->val); +} + + +void GJindexxS(char **arrin,int n,int *indx) + /* indexed heap sort - adapted from the NR routine indexx. + arrin is a character string array to sort + indx is the returned index array + */ +{ + int l,j,ir,indxt,i; +/* + float q; +*/ + char *q; + + for (j=1;j<=n;j++) indx[j]=j; + l=(n >> 1) + 1; + ir=n; + for (;;) { + if (l > 1) + q=arrin[(indxt=indx[--l])]; + else { + q=arrin[(indxt=indx[ir])]; + indx[ir]=indx[1]; + if (--ir == 1) { + indx[1]=indxt; + return; + } + } + i=l; + j=l << 1; + while (j <= ir) { + if (j < ir && (strcmp(arrin[indx[j]],arrin[indx[j+1]]) < 0) ) j++; + if (strcmp(q,arrin[indx[j]])<0) { + indx[i]=indx[j]; + j += (i=j); + } + else j=ir+1; + } + indx[i]=indxt; + } +} + + + +void GJpline(FILE *fp,char c,int n) +/* print n copies of c to fp terminated by newline */ +{ + int i; + for(i=0;i n){ + for(i=0;i imax) imax = ivec[i]; + if(ivec[i] < imin) imin = ivec[i]; + } + + ret_val = (IRANGE *) GJmalloc(sizeof(IRANGE)); + ret_val->min = imin; + ret_val->max = imax; + + return ret_val; +} + +#define BLIM 5 +int GJbsearchINXS(char **cod, int n, char *query) + +/* binary search for query in table cod. If query is found, return index of query in cod.*/ +/* if it is not found, return -1 */ + +{ + int r; /* right limit */ + int l; /* left limit */ + int cv; /* centre value */ + + r = n-1; + l = 0; + + for(;;){ + if((r-l) > BLIM){ + cv = (r+l)/2; + if(strcmp(query,cod[cv]) == 0){ + return cv; + }else if(strcmp(query,cod[cv]) > 0){ + l = cv; + }else if(strcmp(query,cod[cv]) < 0){ + r = cv; + } + }else{ + for(cv=l;cv<(r+1);++cv){ + if(strcmp(query,cod[cv]) == 0){ + return cv; + } + } + return (int) -1; + } + } +} + +int GJbsearchINX_IS(char **cod, int *inx, int n, char *query) + +/* binary search for query in table cod. inx is the index into the table that specifies + the sorted order of the table cod. + If query is found, return index of query in inx that can be used to recover the value of + cod .*/ +/* if it is not found, return -1 */ + +{ + int r; /* right limit */ + int l; /* left limit */ + int cv; /* centre value */ + + r = n-1; + l = 0; + + for(;;){ + if((r-l) > BLIM){ + cv = (r+l)/2; + if(strcmp(query,cod[inx[cv]]) == 0){ + return cv; + }else if(strcmp(query,cod[inx[cv]]) > 0){ + l = cv; + }else if(strcmp(query,cod[inx[cv]]) < 0){ + r = cv; + } + }else{ + for(cv=l;cv<(r+1);++cv){ + if(strcmp(query,cod[inx[cv]]) == 0){ + return cv; + } + } + return (int) -1; + } + } +} + +/* + +$Id: gjutil.c,v 1.13 2002/08/09 12:30:31 geoff Exp $ + +$Log: gjutil.c,v $ +Revision 1.13 2002/08/09 12:30:31 geoff +Experiment with different weighting schemes. +Changes to build_profile to accommodate new schemes +pwf 3 Complex clustering to get sequence weights at each position +pwf 4 Dirichlet mixture alone +pwf 5 Dirichlet mixture + psiblast windowing + blosum weights +pwf 6 local blosum matrix calculation + HH sequence weights + blosum + +Also add wander_check opton +Also add option to turn off sw7 bug work around. +Add option to suppress multiple alignment output +Add gjnoc2 to distribution + +Revision 1.12 2000/12/21 17:25:44 geoff +Add the option to output the sequence fragments from the multiple alignment +output option in fasta or pir format. Unlike the block file output, these +fragments contain the complete sequence between the start and end points, including +any parts deleted in the alignment process. +Add appropriate commands to scanps_alias.dat, getpars and getcmd. + +Revision 1.11 2000/07/04 11:01:37 searle +Changes for MMX + +Revision 1.10 1999/11/17 21:06:47 geoff +Add setup_caleb and other changes to swpp2 and so on. + +Revision 1.9 1999/07/09 13:34:10 geoff +modified these as a test + +Revision 1.8 1998/08/11 15:38:50 geoff +Modified the copyright notice to reflect the new +ownership of this software. + +Revision 1.7 1997/06/29 00:43:57 gjb +Changes to add sysinfo calls and test of license reading routines + +Revision 1.6 1997/06/27 16:42:41 gjb +Add trlic.c test.lic and clean up gjutil.c + +Revision 1.5 1997/06/27 07:17:31 gjb +Added rlic.c linfo.h and +changes to gjutil.c to give better support for +token manipulation + +Revision 1.4 1997/05/12 11:10:53 gjb +Re-added gjutil.c and gjutil.h to repository +after deleting them + +Revision 1.2 1997/05/12 10:47:52 gjb +Modified CVS header and log position + +*/ + + + + diff --git a/binaries/src/iupred/gjutil.h b/binaries/src/iupred/gjutil.h new file mode 100644 index 0000000..0a206df --- /dev/null +++ b/binaries/src/iupred/gjutil.h @@ -0,0 +1,143 @@ +#ifndef GJ_UTIL_H +#define GJ_UTIL_H + +/* +$Id: gjutil.h,v 1.8 2002/08/09 12:30:31 geoff Exp $ +*/ + +#include + + +typedef int GJ_LIM; /* type for limits - e.g. max buffer length */ +typedef int GJ_PEN; /* type for gap penalties */ +typedef int GJ_FLG; /* Flag type - usually just 1 or 0 */ +typedef float GJ_FLOAT; /* Single precision floating point type */ +typedef double GJ_DBL; /* a double precision floating point type */ +typedef int GJ_S_COUNT; /* small counter */ +typedef long GJ_L_COUNT; /* long counter */ + +typedef struct { /* structure to hold a filename and */ + char *name; /* associated handle */ + FILE *handle; +} GJFILE; + +struct tokens { /* structure to hold tokens parsed from */ + int ntok; /* string with strtok */ + char **tok; +}; + +typedef struct { + int min; + int max; +} IRANGE; + +typedef struct { + char *val; + int i; +} CWORK; + +int Sworkcomp(const void *left, const void *right); +void GJindexxS(char **arrin,int n,int *indx); + +#define STD_FILES extern FILE *std_in,*std_out,*std_err + +/* utility.h function definitions */ + +void *GJmalloc(size_t); +void *GJrealloc(void *,size_t); +void *GJmallocNQ(size_t); +void *GJreallocNQ(void *,size_t); +void GJfree(void *); +void GJerror(const char *); +char *GJstrdup(const char *); +char *GJstoupper(const char *); +char *GJstolower(const char *); +char *GJstoup(char *); +char *GJstolo(char *); + +FILE *GJfopen(const char *, const char *,int); +int GJfclose(FILE *,int); +GJFILE *GJfilemake(const char *name,const char *type,int action); +GJFILE *GJfilerename(GJFILE *ret_val, const char *name); +GJFILE *GJfileclose(GJFILE *ret_val,int action); +GJFILE *GJfileopen(GJFILE *ret_val,const char *type,int action); +GJFILE *GJfileclean(GJFILE *fval,int action); +void GJinitfile(void); + +char *GJfnonnull(char *); +char *GJstrappend(char *,char *); +char *GJremovechar(char *,char); +char *GJremovechar2(char *string,char c); +char *GJstrcreate(size_t, char *); +char *GJstrlocate(char *,char *); +char *GJsubchar(char *,char,char); +char *GJstrtok(char *,const char *); +void error(const char *, int); +unsigned char **uchararr(int,int); +void ucharfree(unsigned char **array,int i); +double **GJdarr(int i,int j); +void GJdarrfree(double **array,int i); +signed char **chararr(int,int); +void GJCinit(signed char **,int ,int ,char ); +void mcheck(void *, char *); +char *GJstrblank(char *, int); +void GJUCinit(unsigned char **,int ,int ,unsigned char ); +char *GJcat(int N,...); +struct tokens *GJgettokens(const char *delims, char *buff); +void GJfreetokens(struct tokens *tok); +char * GJtoktostr(struct tokens *tok, char delim, int s, int e); +void GJ_start_clock(void); +void GJ_stop_clock(FILE *fp); +char *GJstrrename(char *old,const char *new); +void GJindexx(int *arrin,int n,int *indx); +void GJindexxD(double *arrin,int n,int *indx); +void GJindexxS(char **arrin,int n,int *indx); +int GJbsearchINXS(char **cod, int n, char *query); +int GJbsearchINX_IS(char **cod, int *inx, int n, char *query); + +void GJpline(FILE *fp,char c,int n); +char *GJlocaltime(void); +void GJpstring(FILE *fp,char *s,int n); + +IRANGE *irange(int *ivec, int n); + +/* +$Log: gjutil.h,v $ +Revision 1.8 2002/08/09 12:30:31 geoff +Experiment with different weighting schemes. +Changes to build_profile to accommodate new schemes +pwf 3 Complex clustering to get sequence weights at each position +pwf 4 Dirichlet mixture alone +pwf 5 Dirichlet mixture + psiblast windowing + blosum weights +pwf 6 local blosum matrix calculation + HH sequence weights + blosum + +Also add wander_check opton +Also add option to turn off sw7 bug work around. +Add option to suppress multiple alignment output +Add gjnoc2 to distribution + +Revision 1.7 2000/12/21 17:25:44 geoff +Add the option to output the sequence fragments from the multiple alignment +output option in fasta or pir format. Unlike the block file output, these +fragments contain the complete sequence between the start and end points, including +any parts deleted in the alignment process. +Add appropriate commands to scanps_alias.dat, getpars and getcmd. + +Revision 1.6 1999/11/17 21:06:47 geoff +Add setup_caleb and other changes to swpp2 and so on. + +Revision 1.5 1997/06/27 07:17:32 gjb +Added rlic.c linfo.h and +changes to gjutil.c to give better support for +token manipulation + +Revision 1.4 1997/05/12 11:10:54 gjb +Re-added gjutil.c and gjutil.h to repository +after deleting them + +Revision 1.2 1997/05/12 10:47:52 gjb +Modified CVS header and log position + +*/ +#endif /* GJ_UTIL_H */ + diff --git a/binaries/src/iupred/iupred b/binaries/src/iupred/iupred index 42a76b0..e02c906 100644 Binary files a/binaries/src/iupred/iupred and b/binaries/src/iupred/iupred differ diff --git a/binaries/src/iupred/iupred.c b/binaries/src/iupred/iupred.c index d3b65ac..2d8b274 100644 --- a/binaries/src/iupred/iupred.c +++ b/binaries/src/iupred/iupred.c @@ -4,6 +4,9 @@ #include #include +#include +#include "getquery.h" +#include "gjutil.h" #define AA "GAVLIFPSTCMWYNQDEKRH" @@ -25,7 +28,7 @@ typedef struct { - char name[1000]; + char *name; int le; char *seq; double expscore; @@ -51,12 +54,14 @@ typedef struct void read_mat(char *path, char *fn, double **MAT, double *matave); int getargs(char *line,char *args[],int max); void read_ref(char *path, char *fn, double **REF); -void Get_Histo( P_STR *P, char *path, char *); +void Get_Histo( P_STR *PARAMS, char *path, char *); double **DMatrix(int n_rows, int n_cols); void *my_malloc(size_t size); -void IUPred(SEQ_STR *SEQ, P_STR *P); +void IUPred(SEQ_STR *SEQ, P_STR *PARAMS); void getRegions(SEQ_STR *SEQ ); void Get_Seq(char *fn, SEQ_STR *SEQ); +void createOutputFiles(int type); +void closeOutputFiles(int type); int LC, UC, WS; @@ -64,22 +69,23 @@ double Min_Ene; int JOIN, DEL; int Flag_EP; double EP; +FILE *shortout; +FILE *longout; +FILE *globout; int main(int argc, char **argv) { - P_STR *P; + P_STR *PARAMS; SEQ_STR *SEQ; int i,j; int type; char *path; - - - - if (argc!=3) { + + if (argc<2) { printf(" Usage: %s seqfile type \n",argv[0]); printf(" where type stands for one of the options of \n"); - printf(" \"long\", \"short\" or \"glob\"\n"); + printf(" \"long\", \"short\", \"glob\" or \"all\"\n"); exit(1); } if ((path=getenv("IUPred_PATH"))==NULL) { @@ -93,9 +99,11 @@ int main(int argc, char **argv) printf("# Z. Dosztanyi, V. Csizmok, P. Tompa and I. Simon\n"); printf("# J. Mol. Biol. (2005) 347, 827-839. \n"); printf("#\n"); + printf("# Modified to work within JABAWS (http://www.compbio.dundee.ac.uk/jabaws) framework by \n"); + printf("# Peter Troshin (pvtroshin@dundee.ac.uk) and Geoff Barton gjbarton@dundee.ac.uk\n"); + printf("# June, 2011\n"); + printf("#\n"); printf("#\n"); - - if ((strncmp(argv[2],"long",4))==0) { @@ -107,13 +115,45 @@ int main(int argc, char **argv) else if ((strncmp(argv[2],"glob",4))==0) { type=2; } + else if ((strncmp(argv[2],"all",3))==0) { + type=3; + } else { - printf("Wrong argument\n");exit(1); + printf("No disorder type is given assuming long\n"); + type=0; } - - - SEQ=malloc(sizeof(SEQ_STR)); - Get_Seq(argv[1],SEQ); + +/* Creating output files depending on the type */ + createOutputFiles(type); + + +/* Read input file sequence by sequence */ + FILE *fasta; + SEQS *fastaseq; + fasta = fopen(argv[1], "r"); + + do { + + fastaseq = gseq_fasta(fasta); + if(fastaseq==NULL) { + break; + } + SEQ=malloc(sizeof(SEQ_STR)); + + //printf("No: %s\n",fastaseq->id ); + //printf("L: %d\n",fastaseq->slen); + + SEQ->name=fastaseq->id; + SEQ->seq=fastaseq->seq; + SEQ->le=fastaseq->slen; + +#ifdef DEBUG + printf("N: %s\n",SEQ->name); + printf("S: %s\n",SEQ->seq); + printf("L: %d\n",SEQ->le); +#endif + + if (SEQ->le==0) {printf(" Sequence length 0\n");exit(1);} #ifdef DEBUG @@ -121,45 +161,47 @@ int main(int argc, char **argv) #endif - P=malloc(sizeof(P_STR)); - P->CC= DMatrix(AAN,AAN); - - if (type==0) { + PARAMS=malloc(sizeof(P_STR)); + PARAMS->CC= DMatrix(AAN,AAN); + /* LONG - DISORDER */ + if (type==0 || type==3) { LC=1; UC=100; WS=10; Flag_EP=0; - read_ref(path,"ss",P->CC); - Get_Histo(P, path, "histo"); + read_ref(path,"ss",PARAMS->CC); + Get_Histo(PARAMS, path, "histo"); + IUPred(SEQ,PARAMS); - IUPred(SEQ,P); + fprintf(longout, "# %s\n",SEQ->name); - printf("# Prediction output \n"); - printf("# %s\n",SEQ->name); for (i=0;ile;i++) - printf("%5d %c %10.4f\n",i+1,SEQ->seq[i],SEQ->en[i]); - } - if (type==1) { - LC=1; + fprintf(longout, "%5d %c %10.4f\n",i+1,SEQ->seq[i],SEQ->en[i]); + +} +/* SHORT - DISORDER */ + if (type==1 || type==3) { + LC=1; UC=25; WS=10; Flag_EP=1; EP=-1.26; - read_ref(path,"ss_casp",P->CC); - Get_Histo(P, path, "histo_casp"); + read_ref(path,"ss_casp",PARAMS->CC); + Get_Histo(PARAMS, path, "histo_casp"); - IUPred(SEQ,P); + IUPred(SEQ,PARAMS); - printf("# Prediction output \n"); - printf("# %s\n",SEQ->name); + fprintf(shortout, "# %s\n",SEQ->name); for (i=0;ile;i++) - printf("%5d %c %10.4f\n",i+1,SEQ->seq[i],SEQ->en[i]); - } - if (type==2) { + fprintf(shortout, "%5d %c %10.4f\n",i+1,SEQ->seq[i],SEQ->en[i]); +} + +/* GLOB - GLOBULAR DOMAINS */ + if (type==2 || type==3) { char *globseq; LC=1; @@ -167,12 +209,10 @@ int main(int argc, char **argv) WS=15; Flag_EP=0; + read_ref(path,"ss",PARAMS->CC); + Get_Histo(PARAMS,path,"histo"); - - read_ref(path,"ss",P->CC); - Get_Histo(P,path,"histo"); - - IUPred(SEQ,P); + IUPred(SEQ,PARAMS); Min_Ene=DMin_Ene; JOIN=DJOIN; @@ -182,42 +222,80 @@ int main(int argc, char **argv) globseq=malloc((SEQ->le+1)*sizeof(char)); for (i=0;ile;i++) globseq[i]=tolower(SEQ->seq[i]); - printf("# Prediction output \n"); - printf("# %s\n",SEQ->name); - printf("Number of globular domains: %5d \n",SEQ->ngr); + fprintf(globout,"# %s\n",SEQ->name); + fprintf(globout,"Number of globular domains: %5d \n",SEQ->ngr); for (i=0;ingr;i++) { - printf(" globular domain %5d. %d - %d \n", + fprintf(globout," globular domain %5d. %d - %d \n", i+1,SEQ->gr[i][0]+1,SEQ->gr[i][1]+1); for (j=SEQ->gr[i][0];jgr[i][1]+1;j++) { globseq[j]=toupper(globseq[j]); } } - printf(">%s\n",SEQ->name); + fprintf(globout,">%s\n",SEQ->name); for (i=0;ile;i++) { - if ((i>0)&&(i%60==0)) printf("\n"); - else if ((i>0)&&(i%10==0)) printf(" "); - printf("%c",globseq[i]); + if ((i>0)&&(i%60==0)) fprintf(globout,"\n"); + else if ((i>0)&&(i%10==0)) fprintf(globout," "); + fprintf(globout,"%c",globseq[i]); } - printf("\n"); + fprintf(globout,"\n"); free(globseq); + } - #ifdef DEBUG for (i=0;ile;i++) printf("%5d %c %10.4f\n",i,SEQ->seq[i],SEQ->en[i]); #endif - } - - free(SEQ->seq); - free(SEQ->eprof);free(SEQ->en);free(SEQ->smp); - free(SEQ); - + + + free(SEQ->name); + free(SEQ->seq); + free(SEQ); + free(fastaseq); + + } while(fastaseq != NULL); + + fclose(fasta); + closeOutputFiles(type); return 0; } -void IUPred(SEQ_STR *SEQ, P_STR *P) +void closeOutputFiles(int type) { + if (type==0) { + fclose(longout); + } + else if (type==1) { + fclose(shortout); + } + else if (type==2) { + fclose(globout); + } + else if (type==3) { + fclose(longout); + fclose(shortout); + fclose(globout); + } +} + +void createOutputFiles(int type) { + if (type==0) { + longout = fopen("out.long", "w"); + } + else if (type==1) { + shortout = fopen("out.short", "w"); + } + else if (type==2) { + globout = fopen("out.glob", "w"); + } + else if (type==3) { + longout = fopen("out.long", "w"); + shortout = fopen("out.short", "w"); + globout = fopen("out.glob", "w"); + } +} + +void IUPred(SEQ_STR *SEQ, P_STR *PARAMS) { int i,j, a1, a2, p; int naa; @@ -225,15 +303,15 @@ void IUPred(SEQ_STR *SEQ, P_STR *P) double min, max, step; naa=SEQ->le; - min=P->min; max=P->max;step=P->step; + min=PARAMS->min; max=PARAMS->max;step=PARAMS->step; SEQ->eprof=malloc(naa*sizeof(double)); for (i=0;ieprof[i]=0; SEQ->en=malloc(naa*sizeof(double)); for (i=0;ien[i]=0; SEQ->smp=malloc(naa*sizeof(double)); - for (i=0;ismp[i]=0; + for (i=0;ismp[i]=0; SEQ->expscore=0; for (i=0;iLC)&&((abs(i-j))seq[j]))))-AA; if ((a2<0) || (a2>=AAN)) continue; - SEQ->eprof[i]+=P->CC[a1][a2]; + SEQ->eprof[i]+=PARAMS->CC[a1][a2]; n2++; } SEQ->expscore+=SEQ->eprof[i]/(naa*n2); SEQ->eprof[i]/=n2; } - + if (Flag_EP==0) { for (i=0;ismp[i]>=max-2*step) SEQ->en[i]=0; if ((SEQ->smp[i]>min+2*step)&&(SEQ->smp[i]smp[i]-min)*(1.0/step)); - SEQ->en[i]=P->distro[p]; + SEQ->en[i]=PARAMS->distro[p]; } #ifdef DEBUG @@ -287,7 +365,6 @@ void IUPred(SEQ_STR *SEQ, P_STR *P) i,SEQ->eprof[i], SEQ->smp[i],SEQ->en[i]); #endif - } } @@ -388,7 +465,7 @@ void getRegions(SEQ_STR *SEQ ) } -void Get_Histo(P_STR *P, char *path, char *fn) +void Get_Histo(P_STR *PARAMS, char *path, char *fn) { FILE *f; char ln[ML]; @@ -410,8 +487,8 @@ void Get_Histo(P_STR *P, char *path, char *fn) fscanf(f,"%*s %lf %lf %d\n",&min, &max, &nb); - P->distro=malloc(nb*sizeof(double )); - for (i=0;idistro[i]=0; + PARAMS->distro=malloc(nb*sizeof(double )); + for (i=0;idistro[i]=0; for (i=0,set=0;idistro[i]=v; + PARAMS->distro[i]=v; } fclose(f); - P->max=max; - P->min=min; - P->nb=nb; - P->cutoff=cutoff; + PARAMS->max=max; + PARAMS->min=min; + PARAMS->nb=nb; + PARAMS->cutoff=cutoff; - - P->step=(max-min)/nb; - P->cutoff-=P->step; + PARAMS->step=(max-min)/nb; + PARAMS->cutoff-=PARAMS->step; } @@ -594,6 +670,7 @@ double **DMatrix(int n_rows, int n_cols) } +/* void Get_Seq(char *fn, SEQ_STR *SEQ) { char line[ML]; @@ -632,8 +709,7 @@ void Get_Seq(char *fn, SEQ_STR *SEQ) #ifdef DEBUG printf("%s %5d\n%s\n",SEQ->name,SEQ->le,SEQ->seq); #endif - } - +*/