1 /***************************************************************************
3 msf2blc: A program to convert a GCG .MSF file into an AMPS blockfile.
5 Copyright: Geoffrey J. Barton (1992,1997)
8 Please see the README file for details of conditions for use of this program.
10 $Id: msf2blc.c,v 1.3 1999/07/09 10:35:29 geoff Exp $
12 Revision 1.3 1999/07/09 10:35:29 geoff
13 Change version and copyright statement to reflect 1997 status
15 Revision 1.2 1998/09/17 16:55:06 geoff
16 Check consistency with archive
19 ****************************************************************************
21 Notes: This program can be run as a pipe: type msf2blc -q < input > output
22 Only error messages will be output to std_err
24 Default mode is interactive and prompts for filenames.
26 The storage for the sequences is allocated dynamically, so the MAX_SEQ_LEN
27 defines in the header file "defaults.h" have no effect. If a system memory
28 limit is reached, then a "malloc error" message will be written and the
29 program will stop. Most computers should happily cope with large numbers of
30 long sequences. Some possible solutions to this problem are outlined in
31 the user manual - alscript.doc
33 24 October 1994: Add -n option to delete . from alignment if found.
35 ****************************************************************************/
47 #define TOKENS " \t\n"
50 main(int argc,char *argv[])
59 extern FILE *std_err,*std_in,*std_out;
72 line = GJstrcreate(MAX_INLEN," ");
73 msffile = GJstrcreate(MAX_INLEN,NULL);
74 blocfile = GJstrcreate(MAX_INLEN,NULL);
82 if(strcmp(argv[1],"-q")==0){
83 /* Quiet mode - read .MSF file from stdin and output block file to stdout */
89 if(strcmp(argv[2],"-n")==0){
90 /* set flag to remove dots */
95 /* Verbose mode - prompt for all filenames */
96 fprintf(std_out,"\n\n");
97 fprintf(std_out,"GCG .MSF to AMPS Blockfile conversion\n");
98 fprintf(std_out,"Copyright: G. J. Barton (1992)\n");
99 fprintf(std_out,"Author: G. J. Barton (1992)\n\n");
100 fprintf(std_out,"Max number/length of alignment - Defined by System\n");
101 fprintf(std_out,"If you get a malloc error message - see manual\n\n");
102 fprintf(std_out,"Enter MSF filename: ");
104 fscanf(std_in,"%s",msffile);
105 fprintf(std_out,"Opening: %s\n",msffile);
106 fp = GJfopen(msffile,"r",1);
108 fprintf(std_out,"Enter Block filename: ");
109 fscanf(std_in,"%s",blocfile);
110 fprintf(std_out,"Opening: %s\n",blocfile);
111 fout = GJfopen(blocfile,"w",1);
115 fprintf(fout,"Conversion of GCG .MSF file to AMPS BLOCKFILE format\n");
116 fprintf(fout,"msf2blc: Geoffrey J. Barton (1992)\n\n");
118 seqs = (struct seqdat *) GJmalloc(sizeof(struct seqdat));
120 if(!quiet)fprintf(std_out,"Reading .msf file\n");
121 while(fgets(line,MAX_INLEN,fp) != NULL){
123 token = strtok(line,TOKENS);
125 if(strcmp(token,"Name:") == 0){
126 /* This is a seq id name */
127 token = strtok(NULL,TOKENS);
128 seqs = (struct seqdat *) GJrealloc(seqs,sizeof(struct seqdat) * (nseq +1));
129 seqs[nseq].id = GJstrdup(token);
130 seqs[nseq].title = GJstrdup(line);
132 seqs[nseq].seq = (char *) GJmalloc(sizeof(char));
134 if(!quiet)fprintf(std_out,"%s\n",seqs[nseq-1].id);
135 }else if((strcmp(token,"//") == 0) || found){
136 /* this signals the end of identifiers so process sequences*/
139 /* find out which seq this is */
142 if(strcmp(token,seqs[i].id) == 0){
146 /* read in the sequence */
148 token = strtok(NULL,"\n");
150 GJerror("Cannot find sequence in line");
151 fprintf(std_err,"%s",line);
155 while(token[j] != '\0'){
156 if(isalpha(token[j]) || token[j] == '.'){
157 seqs[i].seq = (char *) GJrealloc(seqs[i].seq,sizeof(char) * (seqs[i].slen +1));
158 seqs[i].seq[seqs[i].slen] = token[j];
166 /* this is a comment line - just echo */
167 fprintf(fout,"%s\n",line);
172 if(!quiet)fprintf(std_out,"All %d sequences read in\n",nseq);
173 if(!quiet)fprintf(std_out,"Writing .blc file\n");
176 fprintf(fout,">%s %s\n",seqs[i].id,seqs[i].title);
178 fprintf(fout,"* iteration 1\n");
179 for(i=0;i<seqs[0].slen;++i){
181 /* edit out dots if required */
183 if(seqs[j].seq[i] == '.'){
184 seqs[j].seq[i] = ' ';
187 fprintf(fout,"%c",seqs[j].seq[i]);
192 if(!quiet)fprintf(std_out,"All done\n");
197 GJfree(seqs[i].title);