1 /* Bonnie Berger, David Wilson and Theodore Tonchev 1992 */
2 /* Modified by Ethan Wolf 1995 */
12 #include <sys/types.h>
13 #include <netinet/in.h>
16 #include <pwd.h> /* For finding location of a ~usr directory. */
19 /* char aaname[] = "LIVMFYGAKRHEDQNSTCWPX"; */
20 char aaname[]= "LIVMFYGAKRHEDQNSTCWPXBZ.";
23 char posname[] = "abcdefg";
27 #define SIZEOFGFS AANUM
28 #define SIZEOFGTP POSNUM
29 #define SIZEOFGFP (AANUM*AANUM*POSNUM)
31 #define SIZEOFPTS POSNUM
32 #define SIZEOFPFS (AANUM*POSNUM)
33 #define SIZEOFPTP (POSNUM*POSNUM)
34 #define SIZEOFPFP (AANUM*AANUM*POSNUM*POSNUM)
37 #define GENFREQ_MAGIC "GenFreq 1 + 20 + 7 + 20x20x7 long\n"
38 #define POSFREQ_MAGIC "PosFreq 7 + 20x7 + 7x7 + 20x20x7x7 long\n"
41 #define ERROR(STR) error(FOPEN_ERR, (STR))
43 #define NULLERR(X) if (!(X)) error(FOPEN_ERR, name)
44 static fd_set _popened_files;
45 static fd_set *popened_files=NULL;
47 void open_file (FILE** fl, char* name, char* ops)
52 /* Initialize set of popened files */
54 popened_files = &_popened_files;
55 FD_ZERO(popened_files);
57 /* If we aren't reading, do the usual thing */
58 if (strcmp(ops,"r")) {
59 NULLERR(*fl = fopen(name, ops));
62 /* If we're reading a .Z file, use zcat */
63 str_len = strlen(name);
65 error(FOPEN_ERR,"file name too long for internal buffer");
68 if (name[str_len-2]=='.' && name[str_len-1]=='Z') {
69 NULLERR(*fl = popen(cmd, "r"));
70 FD_SET(fileno(*fl),popened_files);
73 /* Otherwise, if the file exists, read it normally */
74 if (!access(name,F_OK)) {
75 NULLERR(*fl = fopen(name, ops));
78 /* And if the file doesn't exist, try adding .Z */
80 if (!access(&(cmd[5]),F_OK)) {
81 NULLERR(*fl = popen(cmd, "r"));
82 FD_SET(fileno(*fl),popened_files);
85 /* The file does not exist. */
86 error(FOPEN_ERR,name);
88 void close_file (FILE *fl)
90 if (FD_ISSET(fileno(fl),popened_files)) {
91 FD_CLR(fileno(fl),popened_files);
103 void writegenfreq(FILE* fl,
104 long totals[], long freqs[AANUM],
105 long totalp[POSNUM], long freqp[AANUM][AANUM][POSNUM],
110 fputs("Writing Gen Frequences.\n", flog);
112 fputs(GENFREQ_MAGIC, fl);
114 htonla(totals, SIZEOFGTS);
115 if (fwrite(totals, sizeof(long), SIZEOFGTS, fl) != SIZEOFGTS)
116 error(FWRITE_ERR, "<GenFreq> file");
118 htonla(freqs, SIZEOFGFS);
119 if (fwrite(freqs, sizeof(long), SIZEOFGFS, fl) != SIZEOFGFS)
120 error(FWRITE_ERR, "<GenFreq> file");
122 htonla(totalp, SIZEOFGTP);
123 if (fwrite(totalp, sizeof(long), SIZEOFGTP, fl) != SIZEOFGTP)
124 error(FWRITE_ERR, "<GenFreq> file");
126 htonla(freqp, SIZEOFGFP);
127 if (fwrite(freqp, sizeof(long), SIZEOFGFP, fl) != SIZEOFGFP)
128 error(FWRITE_ERR, "<GenFreq> file");
131 void readgenfreq(FILE* fl,
132 long totals[], long freqs[AANUM],
133 long totalp[POSNUM], long freqp[AANUM*AANUM*POSNUM],
139 fputs("Reading Gen Frequences.\n", flog);
141 if (strlen(GENFREQ_MAGIC) > 0) {
142 fgets(line, MAXLINE, fl);
143 if (strcmp(line, GENFREQ_MAGIC))
144 error(MAGIC_ERR, "<GenFreq> file");
148 if (fread(totals, sizeof(long), SIZEOFGTS, fl) != SIZEOFGTS)
149 error(FREAD_ERR, "<GenFreq> file");
150 ntohla(totals, SIZEOFGTS);
152 if (fread(freqs, sizeof(long), SIZEOFGFS, fl) != SIZEOFGFS)
153 error(FREAD_ERR, "<GenFreq> file");
154 ntohla(freqs, SIZEOFGFS);
156 if (fread(totalp, sizeof(long), SIZEOFGTP, fl) != SIZEOFGTP)
157 error(FREAD_ERR, "<GenFreq> file");
158 ntohla(totalp, SIZEOFGTP);
160 if (fread(freqp, sizeof(long), SIZEOFGFP, fl) != SIZEOFGFP)
161 error(FREAD_ERR, "<GenFreq> file");
162 ntohla(freqp, SIZEOFGFP);
165 for (i = 0; i < SIZEOFGTS; i++) {
166 fscanf(fl, "%ld", &totals[i]);
168 for (i = 0; i < SIZEOFGFS; i++) {
169 fscanf(fl, "%ld", &freqs[i]);
171 for (i = 0; i < SIZEOFGTP; i++) {
172 fscanf(fl, "%ld", &totalp[i]);
174 for (i = 0; i < SIZEOFGFP; i++) {
175 fscanf(fl, "%ld", &freqp[i]);
180 for (i = 0; i < SIZEOFGTS; i++) {
181 printf("%ld\n", totals[i]);
184 for (i = 0; i < SIZEOFGFS; i++) {
185 printf("%ld\n", freqs[i]);
188 for (i = 0; i < SIZEOFGTP; i++) {
189 printf("%ld\n", totalp[i]);
192 for (i = 0; i < SIZEOFGFP; i++) {
193 printf("%ld\n", freqp[i]);
199 void writeposfreq(FILE* fl,
200 long totals[POSNUM], long freqs[AANUM][POSNUM],
201 long totalp[POSNUM][POSNUM],
202 long freqp[AANUM][AANUM][POSNUM][POSNUM],
207 fputs("Writing Position Frequences.\n", flog);
209 fputs(POSFREQ_MAGIC, fl);
211 htonla(totals, SIZEOFPTS);
212 if (fwrite(totals, sizeof(long), SIZEOFPTS, fl) != SIZEOFPTS)
213 error(FWRITE_ERR, "<PosFreq> file");
215 htonla(freqs, SIZEOFPFS);
216 if (fwrite(freqs, sizeof(long), SIZEOFPFS, fl) != SIZEOFPFS)
217 error(FWRITE_ERR, "<PosFreq> file");
219 htonla(totalp, SIZEOFPTP);
220 if (fwrite(totalp, sizeof(long), SIZEOFPTP, fl) != SIZEOFPTP)
221 error(FWRITE_ERR, "<PosFreq> file");
223 htonla(freqp, SIZEOFPFP);
224 if (fwrite(freqp, sizeof(long), SIZEOFPFP, fl) != SIZEOFPFP)
225 error(FWRITE_ERR, "<PosFreq> file");
228 void readposfreq(FILE* fl,
229 long totals[POSNUM], long freqs[AANUM*POSNUM],
230 long totalp[POSNUM*POSNUM],
231 long freqp[AANUM*AANUM*POSNUM*POSNUM],
237 fprintf(flog, "Reading Position Frequences.\n");
239 if (strlen(POSFREQ_MAGIC) > 0) {
240 fgets(line, MAXLINE, fl);
241 if (strcmp(line, POSFREQ_MAGIC))
242 error(MAGIC_ERR, "<PosFreq> file");
245 if (fread(totals, sizeof(long), SIZEOFPTS, fl) != SIZEOFPTS)
246 error(FREAD_ERR, "<PosFreq> file");
247 ntohla(totals, SIZEOFPTS);
249 if (fread(freqs, sizeof(long), SIZEOFPFS, fl) != SIZEOFPFS)
250 error(FREAD_ERR, "<PosFreq> file");
251 ntohla(freqs, SIZEOFPFS);
253 if (fread(totalp, sizeof(long), SIZEOFPTP, fl) != SIZEOFPTP)
254 error(FREAD_ERR, "<PosFreq> file");
255 ntohla(totalp, SIZEOFPTP);
257 if (fread(freqp, sizeof(long), SIZEOFPFP, fl) != SIZEOFPFP)
258 error(FREAD_ERR, "<PosFreq> file");
259 ntohla(freqp, SIZEOFPFP);
262 for (i = 0; i < SIZEOFPTS; i++) {
263 fscanf(fl, "%ld", &totals[i]);
265 for (i = 0; i < SIZEOFPFS; i++) {
266 fscanf(fl, "%ld", &freqs[i]);
268 for (i = 0; i < SIZEOFPTP; i++) {
269 fscanf(fl, "%ld", &totalp[i]);
271 for (i = 0; i < SIZEOFPFP; i++) {
272 fscanf(fl, "%ld", &freqp[i]);
277 for (i = 0; i < SIZEOFPTS; i++) {
278 printf("%d\n", totals[i]);
280 for (i = 0; i < SIZEOFPFS; i++) {
281 printf("%d\n", freqs[i]);
283 for (i = 0; i < SIZEOFPTP; i++) {
284 printf("%d\n", totalp[i]);
286 for (i = 0; i < SIZEOFPFP; i++) {
287 printf("%d\n", freqp[i]);
292 void htonla(long arr[], int size)
296 for (i = 0; i < size; i++)
297 arr[i] = htonl(arr[i]);
300 void ntohla(long arr[], int size)
304 for (i = 0; i < size; i++)
305 arr[i] = ntohl(arr[i]);
309 int getseq(FILE* fl, char seq[MAXSEQLEN], int* offset, int* seqlen,
316 if (title) *title = 0;
320 if (! (fgets (line, MAXLINE, fl)) )
322 if (!strncmp("OFFSET", line, 6) || line[0] == '>' ||
323 !strncmp(" ..", line+strlen(line)-5, 4)) {
326 fgets (title, MAXLINE, fl); /* skips label */
327 title[strlen(title)-1] = 0;
330 fgets (line, MAXLINE, fl); /* skips label */
331 if (!strncmp("OFFSET", line, 6))
332 if (line[6] && line[7] >= '0' && line[7] <= '6')
333 *offset = line[7] - '0';
334 else *offset = DEFOFFSET;
336 if (! (fgets (line, MAXLINE, fl)) )
338 for (i=0; line[i]; i++)
339 if ((ch = aanum(line[i])) != -1)
340 seq[(*seqlen)++] = ch;
341 else if (line[i] == '*' ||
342 (line[i] == '/' && line[i+1] == '/'))
350 int getseq2 (FILE* fl, Sequence *sequence)
353 char *res = sequence->seq, *tmp;
357 sequence->title[0] = 0;
358 while (fgets(line, MAXLINE, fl)) {
359 if (line[0] == '>') {
360 /* Copy the line - ">" into the code */
361 strcpy(sequence->code,&(line[1]));
363 /* We make tmp point to the first space, \t or \n in the line. We then */
364 /* change that space, \t or \n to a 0, and make tmp point to the next */
365 /* non space, \t or \n character. If there is none (i.e. the line had */
366 /* ended so there are no more characters), then tmp[0]=0 still. In this*/
367 /* case, the next line holds the title. Otherwise, the next series of */
368 /* characters in the line are the title. */
370 tmp = &(sequence->code[strcspn(sequence->code," \t\n")]);
372 tmp = &(tmp[strspn(&tmp[1]," \t\n") + 1]);
374 if (tmp[0]) /* Read more characters after the code, so probably FASTA. */
375 strcpy(sequence->title,tmp);
377 fgets (sequence->title, MAXLINE, fl);
378 sequence->title[strlen(sequence->title)-1] = 0; /* Remove the newline.*/
382 cc=fgetc(fl); /** We want to check the first character of the */
383 /** next line without reading in the whole line. */
384 ungetc(cc,fl); /** Put the character back onto the next line. */
386 if ((cc != EOF) && (cc != '>')) {
387 /** If we are not starting the next sequence: */
388 fgets(line, MAXLINE, fl);
389 for (i=0; line[i] && ( (res-sequence->seq) < MAXSEQLEN); i++)
390 switch (toupper(line[i])) {
391 case 'L': *(res++) = Leucine; break;
392 case 'I': *(res++) = Isoleucine; break;
393 case 'V': *(res++) = Valine; break;
394 case 'M': *(res++) = Methionine; break;
395 case 'F': *(res++) = Phenylalanine; break;
396 case 'Y': *(res++) = Tyrosine; break;
397 case 'G': *(res++) = Glycine; break;
398 case 'A': *(res++) = Alanine; break;
399 case 'K': *(res++) = Lysine; break;
400 case 'R': *(res++) = Arginine; break;
401 case 'H': *(res++) = Histidine; break;
402 case 'E': *(res++) = GlutamicAcid; break;
403 case 'D': *(res++) = AsparticAcid; break;
404 case 'Q': *(res++) = Glutamine; break;
405 case 'N': *(res++) = Asparagine; break;
406 case 'S': *(res++) = Serine; break;
407 case 'T': *(res++) = Threonine; break;
408 case 'C': *(res++) = Cysteine; break;
409 case 'W': *(res++) = Tryptophan; break;
410 case 'P': *(res++) = Proline; break;
411 case 'X': *(res++) = AnyRes; break;
412 case 'B': *(res++) = Asparagix; break;
413 case 'Z': *(res++) = Glutamix; break;
414 /* fprintf(stderr,"Warning, not dealing with B or Z.\n");
415 *(res++) = AnyRes; break;
423 /* fprintf(stderr,"Warning, not dealing with punctuation.\n"); */
428 sequence->seqlen = (res-sequence->seq);
430 /* Get the register info if any */
432 switch (cc=getc(fl)) {
433 case '>': /* Next sequence */
435 if (posr==0) {sequence->reg = NULL; return 1;}
436 if (posr==sequence->seqlen) return 1;
437 fprintf(stderr, "Erroneous .pos file!\n"); exit(-1);
440 if (posr==0) {sequence->reg = NULL; return 1;}
441 if (posr==sequence->seqlen) return 1;
442 fprintf(stderr, "Erroneous .pos file!\n"); exit(-1);
448 fgets(line, MAXLINE, fl);
449 for (j=0; line[j]; j++) {
450 switch (cc = line[j]) {
458 sequence->reg[posr++] = cc-'a';
461 sequence->reg[posr++] = '.';
462 /* not 'genbank', but unspecified */
465 if (posr==sequence->seqlen) return 1;
466 if (posr ==0) {sequence->reg = NULL; return 1;}
467 /* A 2nd * indicates not pos. */
468 fprintf(stderr, "Erroneous .pos file!\n"); exit(-1);
474 fprintf(stderr,"Unable to parse input.\n");
479 return 1; /*** A '*' signals the end of the input seq for */
480 /*** non-FASTA format. */
486 printf("Unable to parse input %c.\n", line[i]);
488 } /* Ends the switch and the for loop */
490 if ( (res-sequence->seq)==MAXSEQLEN) { /* couldn't read whole seq. */
491 sequence->seqlen=MAXSEQLEN;
492 fprintf(stderr,"\nSequence %s was to long to read all of.",
497 sequence->seqlen = (res-sequence->seq);
498 return 1; /* If we read a 2nd > or and EOF then we have finished */
499 /* reading the seq. */
502 printf("Unexpected end of input.\n");
514 for (i = 0; aaname[i]; i++)
524 if (ch >= 0 && ch < strlen(aaname))
533 for (i = 0; posname[i]; i++)
534 if (ch == posname[i])
541 if (ch >= 0 && ch < strlen(posname))
547 FILE *sopen (char* name, char* ops)
556 /* Initialize set of popened files */
557 if (!popened_files) {
558 popened_files = &_popened_files;
559 FD_ZERO(popened_files);
562 if (!name) return NULL;
564 /* Maybe we user wants stdin or stdout */
565 if (!strcmp(name,"-")) {
566 if (!strcmp(ops,"r"))
568 if (!strcmp(ops,"w"))
572 /* If writing and filename starts with "|", use popen */
573 if (!strcmp(ops,"w") && name[0]=='|') {
574 NULLERR(fl = popen(&(name[1]), ops));
575 FD_SET(fileno(fl),popened_files);
579 if (name[1]=='/') { /* Manually find the home dir. */
580 strcpy(temp_name,getenv("HOME")); /* Get home directory. */
581 strcat(temp_name, &name[1]); /* Put it in place of ~ */
584 else { /* Convert ~usr into /location/usr */
585 strncpy(usr_name, &name[1], strcspn(&name[1],"/"));
586 /* Figures out length of usr name (between ~ and /) */
587 /* and copies the usrname into usr_name. Then we need */
588 /* to add end of string character. */
589 usr_name[strcspn(&name[1],"/")] = 0; /* End of string. */
591 strcpy(temp_name, getpwnam(usr_name)->pw_dir);
592 /* copies name of usr login dir to temp_name */
594 strcat(temp_name, &name[strcspn(name,"/")]); /* Replaces ~ with */
601 /* If we aren't reading, do the usual thing */
602 if (strcmp(ops,"r")) {
603 NULLERR(fl = fopen(name, ops));
606 /* If we're reading a .Z file, use zcat */
607 str_len = strlen(name);
609 ERROR("file name too long for internal buffer");
611 if (name[str_len-2]=='.' && name[str_len-1]=='Z') {
614 NULLERR(fl = popen(cmd, "r"));
615 FD_SET(fileno(fl),popened_files);
618 /* Otherwise, if the file exists, read it normally */
619 if (!access(name,F_OK)) {
620 NULLERR(fl = fopen(name, ops));
623 /* And if the file doesn't exist, try adding .Z */
627 if (!access(&(cmd[5]),F_OK)) {
628 NULLERR(fl = popen(cmd, "r"));
629 FD_SET(fileno(fl),popened_files);
632 /* The file does not exist. */
636 void sclose (FILE *fl)
638 if (FD_ISSET(fileno(fl),popened_files)) {
639 FD_CLR(fileno(fl),popened_files);