1 /***************************************************************************
3 clus2blc: A program to convert a CLUSTAL PIR format alignment file to an
6 Copyright: Geoffrey J. Barton (1992,1997)
9 Please see the README file for details of conditions
10 for use of this program.
12 $Id: clus2blc.c,v 1.3 1999/07/09 10:35:28 geoff Exp $
14 Revision 1.3 1999/07/09 10:35:28 geoff
15 Change version and copyright statement to reflect 1997 status
17 Revision 1.2 1998/09/17 16:55:00 geoff
18 Check consistency with archive
22 ****************************************************************************
24 Notes: This program can be run as a pipe: type clus2blc -q < input > output
25 Only error messages will be output to std_err
27 Default mode is interactive and prompts for filenames.
29 The storage for the sequences is allocated dynamically, so the MAX_SEQ_LEN
30 defines in the header file "defaults.h" have no effect. If a system memory
31 limit is reached, then a "malloc error" message will be written and the
32 program will stop. Most computers should happily cope with large numbers of
33 long sequences. If yours doesn't, some possible solutions are outlined in
34 the user manual - alscript.doc.
36 24 Oct 1994 - modify to remove dots and dashes from input file before writing
37 output file (if -n option is second argument ).
39 ****************************************************************************/
51 #define TOKENS " \t\n"
54 main(int argc,char *argv[])
63 extern FILE *std_err,*std_in,*std_out;
68 int allen; /* total alignment length */
75 line = GJstrcreate(MAX_INLEN," ");
76 msffile = GJstrcreate(MAX_INLEN,NULL);
77 blocfile = GJstrcreate(MAX_INLEN,NULL);
86 if(strcmp(argv[1],"-q")==0){
87 /* Quiet mode - read .MSF file from stdin and output block file to stdout */
93 if(strcmp(argv[2],"-n")==0){
94 /* set flag to remove dots */
99 /* Verbose mode - prompt for all filenames */
100 fprintf(std_out,"\n\n");
101 fprintf(std_out,"CLUSTAL NBRF-PIR format to AMPS Blockfile conversion\n");
102 fprintf(std_out,"Copyright: G. J. Barton (1992)\n");
103 fprintf(std_out,"Author: G. J. Barton (1992)\n\n");
104 fprintf(std_out,"Max number/length of alignment - Defined by System\n");
105 fprintf(std_out,"If you get a malloc error message - see manual\n\n");
106 fprintf(std_out,"Enter CLUSTAL NBRF-PIR alignment filename: ");
108 fscanf(std_in,"%s",msffile);
109 fprintf(std_out,"Opening: %s\n",msffile);
110 fp = GJfopen(msffile,"r",1);
112 fprintf(std_out,"Enter Block filename: ");
113 fscanf(std_in,"%s",blocfile);
114 fprintf(std_out,"Opening: %s\n",blocfile);
115 fout = GJfopen(blocfile,"w",1);
119 fprintf(fout,"Conversion of CLUSTAL NBRF-PIR file to AMPS BLOCKFILE format\n");
120 fprintf(fout,"clus2blc: Geoffrey J. Barton (1992)\n\n");
122 seqs = (struct seqdat *) GJmalloc(sizeof(struct seqdat));
124 if(!quiet)fprintf(std_out,"Reading .pir file\n");
126 while(fgets(line,MAX_INLEN,fp) != NULL){
128 /* found an identifier */
129 token = strtok(&line[1]," \n");
131 seqs = (struct seqdat *) GJrealloc(seqs,sizeof(struct seqdat) * (nseq + 1));
132 seqs[nseq].id = GJstrdup(token);
133 if(fgets(line,MAX_INLEN,fp) != NULL){
134 /* read the title line */
135 seqs[nseq].title = GJstrdup(line);
136 seqs[nseq].seq = GJstrcreate(MAX_SEQ_LEN,NULL);
138 seqs[nseq].seq = (char *) GJmalloc(sizeof(char));
140 while((c = fgetc(fp)) != '*'){
141 /* read characters until * */
142 if(isalpha(c) || c == '-' || c == '.'){
143 seqs[nseq].seq = (char *) GJrealloc(seqs[nseq].seq,sizeof(char) * (i+1));
144 seqs[nseq].seq[i] = c;
152 if(i > allen) allen = i;
158 if(!quiet)fprintf(std_out,"All %d sequences read in\n",nseq);
159 if(!quiet)fprintf(std_out,"Writing .blc file\n");
162 fprintf(fout,">%s %s",seqs[i].id,seqs[i].title);
164 fprintf(fout,"* iteration 1\n");
165 for(i=0;i<allen;++i){
167 if(seqs[j].slen <= i){
168 fprintf(fout,"%c",' ');
170 /* edit out dots and dashes if required */
172 if(seqs[j].seq[i] == '.' || seqs[j].seq[i] == '-'){
173 seqs[j].seq[i] = ' ';
176 fprintf(fout,"%c",seqs[j].seq[i]);
182 if(!quiet)fprintf(std_out,"All done\n");
187 GJfree(seqs[i].title);