/*************************************************************************** clus2blc: A program to convert a CLUSTAL PIR format alignment file to an AMPS blockfile. Copyright: Geoffrey J. Barton (1992,1997) email: geoff@ebi.ac.uk Please see the README file for details of conditions for use of this program. $Id: clus2blc.c,v 1.3 1999/07/09 10:35:28 geoff Exp $ $Log: clus2blc.c,v $ Revision 1.3 1999/07/09 10:35:28 geoff Change version and copyright statement to reflect 1997 status Revision 1.2 1998/09/17 16:55:00 geoff Check consistency with archive **************************************************************************** Notes: This program can be run as a pipe: type clus2blc -q < input > output Only error messages will be output to std_err Default mode is interactive and prompts for filenames. The storage for the sequences is allocated dynamically, so the MAX_SEQ_LEN defines in the header file "defaults.h" have no effect. If a system memory limit is reached, then a "malloc error" message will be written and the program will stop. Most computers should happily cope with large numbers of long sequences. If yours doesn't, some possible solutions are outlined in the user manual - alscript.doc. 24 Oct 1994 - modify to remove dots and dashes from input file before writing output file (if -n option is second argument ). ****************************************************************************/ #include #include #include #include #include #include "gjutil.h" #include "array.h" #include "defaults.h" #define TOKENS " \t\n" main(int argc,char *argv[]) { struct seqdat *seqs; FILE *fp,*fout; int nseq; int found; int i,j; char *token,*sbit; char *line; extern FILE *std_err,*std_in,*std_out; char *msffile; char *blocfile; int quiet; char c; int allen; /* total alignment length */ int nodot; std_err = stderr; std_in = stdin; std_out = stdout; line = GJstrcreate(MAX_INLEN," "); msffile = GJstrcreate(MAX_INLEN,NULL); blocfile = GJstrcreate(MAX_INLEN,NULL); nseq = 0; found = 0; quiet = 0; allen = 0; nodot = 0; if(argc > 1){ if(strcmp(argv[1],"-q")==0){ /* Quiet mode - read .MSF file from stdin and output block file to stdout */ quiet = 1; fp = std_in; fout = std_out; } if(argc > 2){ if(strcmp(argv[2],"-n")==0){ /* set flag to remove dots */ nodot = 1; } } }else{ /* Verbose mode - prompt for all filenames */ fprintf(std_out,"\n\n"); fprintf(std_out,"CLUSTAL NBRF-PIR format to AMPS Blockfile conversion\n"); fprintf(std_out,"Copyright: G. J. Barton (1992)\n"); fprintf(std_out,"Author: G. J. Barton (1992)\n\n"); fprintf(std_out,"Max number/length of alignment - Defined by System\n"); fprintf(std_out,"If you get a malloc error message - see manual\n\n"); fprintf(std_out,"Enter CLUSTAL NBRF-PIR alignment filename: "); fscanf(std_in,"%s",msffile); fprintf(std_out,"Opening: %s\n",msffile); fp = GJfopen(msffile,"r",1); fprintf(std_out,"Enter Block filename: "); fscanf(std_in,"%s",blocfile); fprintf(std_out,"Opening: %s\n",blocfile); fout = GJfopen(blocfile,"w",1); } fprintf(fout,"\n"); fprintf(fout,"Conversion of CLUSTAL NBRF-PIR file to AMPS BLOCKFILE format\n"); fprintf(fout,"clus2blc: Geoffrey J. Barton (1992)\n\n"); seqs = (struct seqdat *) GJmalloc(sizeof(struct seqdat)); if(!quiet)fprintf(std_out,"Reading .pir file\n"); nseq = 0; while(fgets(line,MAX_INLEN,fp) != NULL){ if(line[0] == '>'){ /* found an identifier */ token = strtok(&line[1]," \n"); if(token != NULL){ seqs = (struct seqdat *) GJrealloc(seqs,sizeof(struct seqdat) * (nseq + 1)); seqs[nseq].id = GJstrdup(token); if(fgets(line,MAX_INLEN,fp) != NULL){ /* read the title line */ seqs[nseq].title = GJstrdup(line); seqs[nseq].seq = GJstrcreate(MAX_SEQ_LEN,NULL); seqs[nseq].slen = 0; seqs[nseq].seq = (char *) GJmalloc(sizeof(char)); i=0; while((c = fgetc(fp)) != '*'){ /* read characters until * */ if(isalpha(c) || c == '-' || c == '.'){ seqs[nseq].seq = (char *) GJrealloc(seqs[nseq].seq,sizeof(char) * (i+1)); seqs[nseq].seq[i] = c; ++i; }else if(c == EOF){ break; } } } seqs[nseq].slen = i; if(i > allen) allen = i; ++nseq; } } } if(!quiet)fprintf(std_out,"All %d sequences read in\n",nseq); if(!quiet)fprintf(std_out,"Writing .blc file\n"); for(i=0;i%s %s",seqs[i].id,seqs[i].title); } fprintf(fout,"* iteration 1\n"); for(i=0;i