X-Git-Url: http://source.jalview.org/gitweb/?a=blobdiff_plain;f=sources%2Fmulticoil%2Fscio.c;fp=sources%2Fmulticoil%2Fscio.c;h=be9fa5ba075444036e0fe364efba622078b8e0a1;hb=a5e6297d655a784603d499da5a025d5d5fa78783;hp=0000000000000000000000000000000000000000;hpb=df24dcd3c415c000592af419f2c9304a4e05c2ee;p=jpred.git diff --git a/sources/multicoil/scio.c b/sources/multicoil/scio.c new file mode 100644 index 0000000..be9fa5b --- /dev/null +++ b/sources/multicoil/scio.c @@ -0,0 +1,646 @@ +/* Bonnie Berger, David Wilson and Theodore Tonchev 1992 */ +/* Modified by Ethan Wolf 1995 */ +/* C Code File */ + +#include +#include +#include +#include "sc.h" +#include "scio.h" +#include "scsystem.h" +#include "scconst.h" +#include +#include +#include + +#include /* For finding location of a ~usr directory. */ + +/* Amino Acid IDs */ +/* char aaname[] = "LIVMFYGAKRHEDQNSTCWPX"; */ +char aaname[]= "LIVMFYGAKRHEDQNSTCWPXBZ."; + +/* Position IDs */ +char posname[] = "abcdefg"; + +/* Array Sizes */ +#define SIZEOFGTS 1 +#define SIZEOFGFS AANUM +#define SIZEOFGTP POSNUM +#define SIZEOFGFP (AANUM*AANUM*POSNUM) + +#define SIZEOFPTS POSNUM +#define SIZEOFPFS (AANUM*POSNUM) +#define SIZEOFPTP (POSNUM*POSNUM) +#define SIZEOFPFP (AANUM*AANUM*POSNUM*POSNUM) + +/* Magic Numbers */ +#define GENFREQ_MAGIC "GenFreq 1 + 20 + 7 + 20x20x7 long\n" +#define POSFREQ_MAGIC "PosFreq 7 + 20x7 + 7x7 + 20x20x7x7 long\n" + +#ifndef ERROR +#define ERROR(STR) error(FOPEN_ERR, (STR)) +#endif +#define NULLERR(X) if (!(X)) error(FOPEN_ERR, name) +static fd_set _popened_files; +static fd_set *popened_files=NULL; + +void open_file (FILE** fl, char* name, char* ops) +{ + int str_len; + char cmd[256]; + + /* Initialize set of popened files */ + if (!popened_files) { + popened_files = &_popened_files; + FD_ZERO(popened_files); + } + /* If we aren't reading, do the usual thing */ + if (strcmp(ops,"r")) { + NULLERR(*fl = fopen(name, ops)); + return; } + + /* If we're reading a .Z file, use zcat */ + str_len = strlen(name); + if (str_len>256-8) + error(FOPEN_ERR,"file name too long for internal buffer"); + strcpy(cmd,"zcat "); + strcat(cmd,name); + if (name[str_len-2]=='.' && name[str_len-1]=='Z') { + NULLERR(*fl = popen(cmd, "r")); + FD_SET(fileno(*fl),popened_files); + return; } + + /* Otherwise, if the file exists, read it normally */ + if (!access(name,F_OK)) { + NULLERR(*fl = fopen(name, ops)); + return; } + + /* And if the file doesn't exist, try adding .Z */ + strcat(cmd,".Z"); + if (!access(&(cmd[5]),F_OK)) { + NULLERR(*fl = popen(cmd, "r")); + FD_SET(fileno(*fl),popened_files); + return; } + + /* The file does not exist. */ + error(FOPEN_ERR,name); +} +void close_file (FILE *fl) +{ + if (FD_ISSET(fileno(fl),popened_files)) { + FD_CLR(fileno(fl),popened_files); + pclose(fl); + } + else + fclose(fl); +} + +void htonla(); +void ntohla(); + + + +void writegenfreq(FILE* fl, + long totals[], long freqs[AANUM], + long totalp[POSNUM], long freqp[AANUM][AANUM][POSNUM], + FILE* flog) +{ + int i; + + fputs("Writing Gen Frequences.\n", flog); + + fputs(GENFREQ_MAGIC, fl); + + htonla(totals, SIZEOFGTS); + if (fwrite(totals, sizeof(long), SIZEOFGTS, fl) != SIZEOFGTS) + error(FWRITE_ERR, " file"); + + htonla(freqs, SIZEOFGFS); + if (fwrite(freqs, sizeof(long), SIZEOFGFS, fl) != SIZEOFGFS) + error(FWRITE_ERR, " file"); + + htonla(totalp, SIZEOFGTP); + if (fwrite(totalp, sizeof(long), SIZEOFGTP, fl) != SIZEOFGTP) + error(FWRITE_ERR, " file"); + + htonla(freqp, SIZEOFGFP); + if (fwrite(freqp, sizeof(long), SIZEOFGFP, fl) != SIZEOFGFP) + error(FWRITE_ERR, " file"); +} + +void readgenfreq(FILE* fl, + long totals[], long freqs[AANUM], + long totalp[POSNUM], long freqp[AANUM*AANUM*POSNUM], + FILE* flog) +{ + char line[MAXLINE]; + int i; + + fputs("Reading Gen Frequences.\n", flog); + + if (strlen(GENFREQ_MAGIC) > 0) { + fgets(line, MAXLINE, fl); + if (strcmp(line, GENFREQ_MAGIC)) + error(MAGIC_ERR, " file"); + } + +/* + if (fread(totals, sizeof(long), SIZEOFGTS, fl) != SIZEOFGTS) + error(FREAD_ERR, " file"); + ntohla(totals, SIZEOFGTS); + + if (fread(freqs, sizeof(long), SIZEOFGFS, fl) != SIZEOFGFS) + error(FREAD_ERR, " file"); + ntohla(freqs, SIZEOFGFS); + + if (fread(totalp, sizeof(long), SIZEOFGTP, fl) != SIZEOFGTP) + error(FREAD_ERR, " file"); + ntohla(totalp, SIZEOFGTP); + + if (fread(freqp, sizeof(long), SIZEOFGFP, fl) != SIZEOFGFP) + error(FREAD_ERR, " file"); + ntohla(freqp, SIZEOFGFP); +*/ + + for (i = 0; i < SIZEOFGTS; i++) { + fscanf(fl, "%ld", &totals[i]); + } + for (i = 0; i < SIZEOFGFS; i++) { + fscanf(fl, "%ld", &freqs[i]); + } + for (i = 0; i < SIZEOFGTP; i++) { + fscanf(fl, "%ld", &totalp[i]); + } + for (i = 0; i < SIZEOFGFP; i++) { + fscanf(fl, "%ld", &freqp[i]); + } + +/* + printf("gtotals\n"); + for (i = 0; i < SIZEOFGTS; i++) { + printf("%ld\n", totals[i]); + } + printf("gfreqs\n"); + for (i = 0; i < SIZEOFGFS; i++) { + printf("%ld\n", freqs[i]); + } + printf("gtotalp\n"); + for (i = 0; i < SIZEOFGTP; i++) { + printf("%ld\n", totalp[i]); + } + printf("gfreqp\n"); + for (i = 0; i < SIZEOFGFP; i++) { + printf("%ld\n", freqp[i]); + } +*/ +} + + +void writeposfreq(FILE* fl, + long totals[POSNUM], long freqs[AANUM][POSNUM], + long totalp[POSNUM][POSNUM], + long freqp[AANUM][AANUM][POSNUM][POSNUM], + FILE* flog) +{ + int i,j; + + fputs("Writing Position Frequences.\n", flog); + + fputs(POSFREQ_MAGIC, fl); + + htonla(totals, SIZEOFPTS); + if (fwrite(totals, sizeof(long), SIZEOFPTS, fl) != SIZEOFPTS) + error(FWRITE_ERR, " file"); + + htonla(freqs, SIZEOFPFS); + if (fwrite(freqs, sizeof(long), SIZEOFPFS, fl) != SIZEOFPFS) + error(FWRITE_ERR, " file"); + + htonla(totalp, SIZEOFPTP); + if (fwrite(totalp, sizeof(long), SIZEOFPTP, fl) != SIZEOFPTP) + error(FWRITE_ERR, " file"); + + htonla(freqp, SIZEOFPFP); + if (fwrite(freqp, sizeof(long), SIZEOFPFP, fl) != SIZEOFPFP) + error(FWRITE_ERR, " file"); +} + +void readposfreq(FILE* fl, + long totals[POSNUM], long freqs[AANUM*POSNUM], + long totalp[POSNUM*POSNUM], + long freqp[AANUM*AANUM*POSNUM*POSNUM], + FILE* flog) +{ + char line[MAXLINE]; + int i; + + fprintf(flog, "Reading Position Frequences.\n"); + + if (strlen(POSFREQ_MAGIC) > 0) { + fgets(line, MAXLINE, fl); + if (strcmp(line, POSFREQ_MAGIC)) + error(MAGIC_ERR, " file"); + } +/* + if (fread(totals, sizeof(long), SIZEOFPTS, fl) != SIZEOFPTS) + error(FREAD_ERR, " file"); + ntohla(totals, SIZEOFPTS); + + if (fread(freqs, sizeof(long), SIZEOFPFS, fl) != SIZEOFPFS) + error(FREAD_ERR, " file"); + ntohla(freqs, SIZEOFPFS); + + if (fread(totalp, sizeof(long), SIZEOFPTP, fl) != SIZEOFPTP) + error(FREAD_ERR, " file"); + ntohla(totalp, SIZEOFPTP); + + if (fread(freqp, sizeof(long), SIZEOFPFP, fl) != SIZEOFPFP) + error(FREAD_ERR, " file"); + ntohla(freqp, SIZEOFPFP); +*/ + + for (i = 0; i < SIZEOFPTS; i++) { + fscanf(fl, "%ld", &totals[i]); + } + for (i = 0; i < SIZEOFPFS; i++) { + fscanf(fl, "%ld", &freqs[i]); + } + for (i = 0; i < SIZEOFPTP; i++) { + fscanf(fl, "%ld", &totalp[i]); + } + for (i = 0; i < SIZEOFPFP; i++) { + fscanf(fl, "%ld", &freqp[i]); + } + + +/* + for (i = 0; i < SIZEOFPTS; i++) { + printf("%d\n", totals[i]); + } + for (i = 0; i < SIZEOFPFS; i++) { + printf("%d\n", freqs[i]); + } + for (i = 0; i < SIZEOFPTP; i++) { + printf("%d\n", totalp[i]); + } + for (i = 0; i < SIZEOFPFP; i++) { + printf("%d\n", freqp[i]); + } +*/ +} + +void htonla(long arr[], int size) +{ + int i; + + for (i = 0; i < size; i++) + arr[i] = htonl(arr[i]); +} + +void ntohla(long arr[], int size) +{ + int i; + + for (i = 0; i < size; i++) + arr[i] = ntohl(arr[i]); +} + + +int getseq(FILE* fl, char seq[MAXSEQLEN], int* offset, int* seqlen, + char *title) +{ + char line[MAXLINE]; + char ch; + int i; + + if (title) *title = 0; + *seqlen = 0; + *offset = POSNUM; + while (1) { + if (! (fgets (line, MAXLINE, fl)) ) + return 0; + if (!strncmp("OFFSET", line, 6) || line[0] == '>' || + !strncmp(" ..", line+strlen(line)-5, 4)) { + if (line[0] == '>') + if (title) { + fgets (title, MAXLINE, fl); /* skips label */ + title[strlen(title)-1] = 0; + } + else + fgets (line, MAXLINE, fl); /* skips label */ + if (!strncmp("OFFSET", line, 6)) + if (line[6] && line[7] >= '0' && line[7] <= '6') + *offset = line[7] - '0'; + else *offset = DEFOFFSET; + while (1) { + if (! (fgets (line, MAXLINE, fl)) ) + return 1; + for (i=0; line[i]; i++) + if ((ch = aanum(line[i])) != -1) + seq[(*seqlen)++] = ch; + else if (line[i] == '*' || + (line[i] == '/' && line[i+1] == '/')) + return 1; + } + } + } +} + + +int getseq2 (FILE* fl, Sequence *sequence) +{ + char line[MAXLINE]; + char *res = sequence->seq, *tmp; + int i, j, cc; + int posr=0; + + sequence->title[0] = 0; + while (fgets(line, MAXLINE, fl)) { + if (line[0] == '>') { + /* Copy the line - ">" into the code */ + strcpy(sequence->code,&(line[1])); + + /* We make tmp point to the first space, \t or \n in the line. We then */ + /* change that space, \t or \n to a 0, and make tmp point to the next */ + /* non space, \t or \n character. If there is none (i.e. the line had */ + /* ended so there are no more characters), then tmp[0]=0 still. In this*/ + /* case, the next line holds the title. Otherwise, the next series of */ + /* characters in the line are the title. */ + + tmp = &(sequence->code[strcspn(sequence->code," \t\n")]); + tmp[0]=0; + tmp = &(tmp[strspn(&tmp[1]," \t\n") + 1]); + + if (tmp[0]) /* Read more characters after the code, so probably FASTA. */ + strcpy(sequence->title,tmp); + else + fgets (sequence->title, MAXLINE, fl); + sequence->title[strlen(sequence->title)-1] = 0; /* Remove the newline.*/ + + + while (1) { + cc=fgetc(fl); /** We want to check the first character of the */ + /** next line without reading in the whole line. */ + ungetc(cc,fl); /** Put the character back onto the next line. */ + + if ((cc != EOF) && (cc != '>')) { + /** If we are not starting the next sequence: */ + fgets(line, MAXLINE, fl); + for (i=0; line[i] && ( (res-sequence->seq) < MAXSEQLEN); i++) + switch (toupper(line[i])) { + case 'L': *(res++) = Leucine; break; + case 'I': *(res++) = Isoleucine; break; + case 'V': *(res++) = Valine; break; + case 'M': *(res++) = Methionine; break; + case 'F': *(res++) = Phenylalanine; break; + case 'Y': *(res++) = Tyrosine; break; + case 'G': *(res++) = Glycine; break; + case 'A': *(res++) = Alanine; break; + case 'K': *(res++) = Lysine; break; + case 'R': *(res++) = Arginine; break; + case 'H': *(res++) = Histidine; break; + case 'E': *(res++) = GlutamicAcid; break; + case 'D': *(res++) = AsparticAcid; break; + case 'Q': *(res++) = Glutamine; break; + case 'N': *(res++) = Asparagine; break; + case 'S': *(res++) = Serine; break; + case 'T': *(res++) = Threonine; break; + case 'C': *(res++) = Cysteine; break; + case 'W': *(res++) = Tryptophan; break; + case 'P': *(res++) = Proline; break; + case 'X': *(res++) = AnyRes; break; + case 'B': *(res++) = Asparagix; break; + case 'Z': *(res++) = Glutamix; break; +/* fprintf(stderr,"Warning, not dealing with B or Z.\n"); + *(res++) = AnyRes; break; +*/ + case '(': + case ')': + case '=': + case '/': + case '.': + case ',': + /* fprintf(stderr,"Warning, not dealing with punctuation.\n"); */ + break; + + + case '*': + sequence->seqlen = (res-sequence->seq); + + /* Get the register info if any */ + while (1) { + switch (cc=getc(fl)) { + case '>': /* Next sequence */ + ungetc(cc,fl); + if (posr==0) {sequence->reg = NULL; return 1;} + if (posr==sequence->seqlen) return 1; + fprintf(stderr, "Erroneous .pos file!\n"); exit(-1); + break; + case EOF: + if (posr==0) {sequence->reg = NULL; return 1;} + if (posr==sequence->seqlen) return 1; + fprintf(stderr, "Erroneous .pos file!\n"); exit(-1); + break; + default: + ungetc(cc,fl); + } + + fgets(line, MAXLINE, fl); + for (j=0; line[j]; j++) { + switch (cc = line[j]) { + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + sequence->reg[posr++] = cc-'a'; + break; + case '.': + sequence->reg[posr++] = '.'; + /* not 'genbank', but unspecified */ + break; + case '*': + if (posr==sequence->seqlen) return 1; + if (posr ==0) {sequence->reg = NULL; return 1;} + /* A 2nd * indicates not pos. */ + fprintf(stderr, "Erroneous .pos file!\n"); exit(-1); + case ' ': + case '\n': + case '\t': + break; + default: + fprintf(stderr,"Unable to parse input.\n"); + exit(-1); + } + } + } + return 1; /*** A '*' signals the end of the input seq for */ + /*** non-FASTA format. */ + case ' ': + case '\n': + case '\t': + break; + default: + printf("Unable to parse input %c.\n", line[i]); + exit(-1); + } /* Ends the switch and the for loop */ + + if ( (res-sequence->seq)==MAXSEQLEN) { /* couldn't read whole seq. */ + sequence->seqlen=MAXSEQLEN; + fprintf(stderr,"\nSequence %s was to long to read all of.", + sequence->code); + } + } + else { + sequence->seqlen = (res-sequence->seq); + return 1; /* If we read a 2nd > or and EOF then we have finished */ + /* reading the seq. */ + } + } + printf("Unexpected end of input.\n"); + exit(-1); + } + } + return 0; +} + +char aanum(char ch) +{ + int i; + + ch = toupper(ch); + for (i = 0; aaname[i]; i++) + if (ch == aaname[i]) + return i; + return -1; +} + + + +char numaa(char ch) +{ + if (ch >= 0 && ch < strlen(aaname)) + return aaname[ch]; + else + return 0; +} + +char posnum(char ch) +{ + int i; + for (i = 0; posname[i]; i++) + if (ch == posname[i]) + return i; + return -1; +} + +char numpos(char ch) +{ + if (ch >= 0 && ch < strlen(posname)) + return posname[ch]; + else + return 0; +} + +FILE *sopen (char* name, char* ops) +{ + FILE *fl; + int str_len; + char cmd[256]; + char temp_name[500]; + char usr_name[50]; + int i; + + /* Initialize set of popened files */ + if (!popened_files) { + popened_files = &_popened_files; + FD_ZERO(popened_files); + } + /* Is name valid? */ + if (!name) return NULL; + + /* Maybe we user wants stdin or stdout */ + if (!strcmp(name,"-")) { + if (!strcmp(ops,"r")) + return stdin; + if (!strcmp(ops,"w")) + return stdout; + ERROR(name); + } + /* If writing and filename starts with "|", use popen */ + if (!strcmp(ops,"w") && name[0]=='|') { + NULLERR(fl = popen(&(name[1]), ops)); + FD_SET(fileno(fl),popened_files); + return fl; } + + if (name[0]=='~') + if (name[1]=='/') { /* Manually find the home dir. */ + strcpy(temp_name,getenv("HOME")); /* Get home directory. */ + strcat(temp_name, &name[1]); /* Put it in place of ~ */ + name= temp_name; + } + else { /* Convert ~usr into /location/usr */ + strncpy(usr_name, &name[1], strcspn(&name[1],"/")); + /* Figures out length of usr name (between ~ and /) */ + /* and copies the usrname into usr_name. Then we need */ + /* to add end of string character. */ + usr_name[strcspn(&name[1],"/")] = 0; /* End of string. */ + + strcpy(temp_name, getpwnam(usr_name)->pw_dir); + /* copies name of usr login dir to temp_name */ + + strcat(temp_name, &name[strcspn(name,"/")]); /* Replaces ~ with */ + /* /location */ + name= temp_name; + } + + + + /* If we aren't reading, do the usual thing */ + if (strcmp(ops,"r")) { + NULLERR(fl = fopen(name, ops)); + return fl; } + + /* If we're reading a .Z file, use zcat */ + str_len = strlen(name); + if (str_len>256-16) + ERROR("file name too long for internal buffer"); + + if (name[str_len-2]=='.' && name[str_len-1]=='Z') { + strcpy(cmd,"zcat "); + strcat(cmd,name); + NULLERR(fl = popen(cmd, "r")); + FD_SET(fileno(fl),popened_files); + return fl; } + + /* Otherwise, if the file exists, read it normally */ + if (!access(name,F_OK)) { + NULLERR(fl = fopen(name, ops)); + return fl; } + + /* And if the file doesn't exist, try adding .Z */ + strcpy(cmd,"zcat "); + strcat(cmd,name); + strcat(cmd,".Z"); + if (!access(&(cmd[5]),F_OK)) { + NULLERR(fl = popen(cmd, "r")); + FD_SET(fileno(fl),popened_files); + return fl; } + + /* The file does not exist. */ + ERROR(name); +} + +void sclose (FILE *fl) +{ + if (FD_ISSET(fileno(fl),popened_files)) { + FD_CLR(fileno(fl),popened_files); + pclose(fl); + } + else + fclose(fl); +} + +/* End of Code */