1 /***************************************************************************
3 fast2blc: A program to convert a FASTA format alignment file to an
6 Copyright: Geoffrey J. Barton (1992,1997)
9 Please see the README file for details of conditions for use of this program.
11 $Id: fast2blc.c,v 1.3 1999/07/09 10:35:29 geoff Exp $
13 Revision 1.3 1999/07/09 10:35:29 geoff
14 Change version and copyright statement to reflect 1997 status
16 Revision 1.2 1998/09/17 16:55:01 geoff
17 Check consistency with archive
20 ****************************************************************************
22 Notes: This program can be run as a pipe: type fast2blc -q < input > output
23 Only error messages will be output to std_err
25 Default mode is interactive and prompts for filenames.
27 The storage for the sequences is allocated dynamically, so the MAX_SEQ_LEN
28 defines in the header file "defaults.h" have no effect. If a system memory
29 limit is reached, then a "malloc error" message will be written and the
30 program will stop. Most computers should happily cope with large numbers of
31 long sequences. If yours doesn't, some possible solutions are outlined in
32 the user manual - alscript.doc.
34 24 Oct 1994 - modify to remove dots and dashes from input file before writing
35 output file (if -n option is second argument ).
37 19 April 1995 - fast2blc derived from clus2blc to read a FASTA style alignment
38 file. This assumes the title line is after the id code rather than on the
41 ****************************************************************************/
53 #define TOKENS " \t\n"
56 main(int argc,char *argv[])
65 extern FILE *std_err,*std_in,*std_out;
70 int allen; /* total alignment length */
77 line = GJstrcreate(MAX_INLEN," ");
78 msffile = GJstrcreate(MAX_INLEN,NULL);
79 blocfile = GJstrcreate(MAX_INLEN,NULL);
88 if(strcmp(argv[1],"-q")==0){
89 /* Quiet mode - read .MSF file from stdin and output block file to stdout */
95 if(strcmp(argv[2],"-n")==0){
96 /* set flag to remove dots */
101 /* Verbose mode - prompt for all filenames */
102 fprintf(std_out,"\n\n");
103 fprintf(std_out,"FASTA format to AMPS Blockfile conversion\n");
104 fprintf(std_out,"Copyright: G. J. Barton (1992)\n");
105 fprintf(std_out,"Author: G. J. Barton (1992)\n\n");
106 fprintf(std_out,"Max number/length of alignment - Defined by System\n");
107 fprintf(std_out,"If you get a malloc error message - see manual\n\n");
108 fprintf(std_out,"Enter FASTA format alignment filename: ");
110 fscanf(std_in,"%s",msffile);
111 fprintf(std_out,"Opening: %s\n",msffile);
112 fp = GJfopen(msffile,"r",1);
114 fprintf(std_out,"Enter Block filename: ");
115 fscanf(std_in,"%s",blocfile);
116 fprintf(std_out,"Opening: %s\n",blocfile);
117 fout = GJfopen(blocfile,"w",1);
121 fprintf(fout,"Conversion of FASTA file to AMPS BLOCKFILE format\n");
122 fprintf(fout,"fast2blc: Geoffrey J. Barton (1992)\n\n");
124 seqs = (struct seqdat *) GJmalloc(sizeof(struct seqdat));
126 if(!quiet)fprintf(std_out,"Reading .fas file\n");
128 while(fgets(line,MAX_INLEN,fp) != NULL){
130 /* found an identifier */
131 token = strtok(&line[1]," -\n");
133 seqs = (struct seqdat *) GJrealloc(seqs,sizeof(struct seqdat) * (nseq + 1));
134 seqs[nseq].id = GJstrdup(token);
135 token = strtok(NULL,"\n");
137 /* assign the title line */
138 seqs[nseq].title = GJstrdup(token);
139 seqs[nseq].seq = GJstrcreate(MAX_SEQ_LEN,NULL);
141 seqs[nseq].seq = (char *) GJmalloc(sizeof(char));
143 while((c = fgetc(fp)) != '*'){
144 /* read characters until * */
145 if(isalpha(c) || c == '-' || c == '.'){
146 seqs[nseq].seq = (char *) GJrealloc(seqs[nseq].seq,sizeof(char) * (i+1));
147 seqs[nseq].seq[i] = c;
155 "Error: No title in sequence %s - is this FASTA format?\n",seqs[nseq].id);
159 if(i > allen) allen = i;
165 if(!quiet)fprintf(std_out,"All %d sequences read in\n",nseq);
166 if(!quiet)fprintf(std_out,"Writing .blc file\n");
169 fprintf(fout,">%s %s\n",seqs[i].id,seqs[i].title);
171 fprintf(fout,"* iteration 1\n");
172 for(i=0;i<allen;++i){
174 if(seqs[j].slen <= i){
175 fprintf(fout,"%c",' ');
177 /* edit out dots and dashes if required */
179 if(seqs[j].seq[i] == '.' || seqs[j].seq[i] == '-'){
180 seqs[j].seq[i] = ' ';
183 fprintf(fout,"%c",seqs[j].seq[i]);
189 if(!quiet)fprintf(std_out,"All done\n");
194 GJfree(seqs[i].title);