1 /************************************************************
2 * HMMER - Biological sequence analysis with profile HMMs
3 * Copyright (C) 1992-1999 Washington University School of Medicine
6 * This source code is distributed under the terms of the
7 * GNU General Public License. See the files COPYING and LICENSE
9 ************************************************************/
12 * SRE, Thu Oct 30 08:56:22 1997; St. Louis
14 * main() for converting between HMM file formats, and
15 * for converting HMMs to other software formats like GCG profiles.
17 * CVS $Id: hmmconvert.c,v 1.1.1.1 2005/03/22 08:33:58 cmzmasek Exp $
23 #include "structs.h" /* data structures, macros, #define's */
24 #include "config.h" /* compile-time configuration constants */
25 #include "funcs.h" /* function declarations */
26 #include "globals.h" /* alphabet global variables */
27 #include "squid.h" /* general sequence analysis library */
29 static char banner[] = "hmmconvert - convert between profile HMM file formats";
31 static char usage[] = "\
32 Usage: hmmconvert [-options] <old hmm file> <new hmm file>\n\
33 Available options are:\n\
34 -h : help; print brief help on version and usage\n\
36 -a : convert to HMMER ASCII file (the default)\n\
37 -b : convert to HMMER binary file\n\
38 -p : convert to GCG Profile .prf format\n\
39 -P : convert to Compugen extended .eprf profile format\n\
41 -A : append mode; append to <new hmm file>\n\
42 -F : force mode; allow overwriting of existing files\n\
45 static char experts[] = "\
49 static struct opt_s OPTIONS[] = {
50 { "-a", TRUE, sqdARG_NONE },
51 { "-b", TRUE, sqdARG_NONE },
52 { "-h", TRUE, sqdARG_NONE },
53 { "-p", TRUE, sqdARG_NONE },
54 { "-A", TRUE, sqdARG_NONE },
55 { "-F", TRUE, sqdARG_NONE },
56 { "-P", TRUE, sqdARG_NONE },
58 #define NOPTIONS (sizeof(OPTIONS) / sizeof(struct opt_s))
61 main(int argc, char **argv)
63 char *infile; /* name of input HMM file */
64 char *outfile; /* name of output HMM file */
65 HMMFILE *infp; /* input HMM file ptr */
66 FILE *outfp; /* output HMM file ptr */
67 char *mode; /* mode to open file in */
68 struct plan7_s *hmm; /* a profile HMM structure */
69 int nhmm; /* number of HMMs converted */
71 char *optname; /* name of option found by Getopt() */
72 char *optarg; /* argument found by Getopt() */
73 int optind; /* index in argv[] */
75 int do_append; /* TRUE to append to existing outfile */
76 int do_force; /* TRUE to allow overwriting */
77 enum hmmfmt_e { P7ASCII, P7BINARY, GCGPROFILE, BICPROFILE }
78 outfmt; /* output format */
80 /***********************************************
82 ***********************************************/
88 while (Getopt(argc, argv, OPTIONS, NOPTIONS, usage,
89 &optind, &optname, &optarg)) {
90 if (strcmp(optname, "-a") == 0) { outfmt = P7ASCII; }
91 else if (strcmp(optname, "-b") == 0) { outfmt = P7BINARY; }
92 else if (strcmp(optname, "-p") == 0) { outfmt = GCGPROFILE; }
93 else if (strcmp(optname, "-A") == 0) { do_append = TRUE; }
94 else if (strcmp(optname, "-F") == 0) { do_force = TRUE; }
95 else if (strcmp(optname, "-P") == 0) { outfmt = BICPROFILE; }
96 else if (strcmp(optname, "-h") == 0) {
97 Banner(stdout, banner);
103 if (argc - optind != 2)
104 Die("Incorrect number of arguments.\n%s\n", usage);
106 infile = argv[optind++];
107 outfile = argv[optind++];
109 /***********************************************
110 * Open input HMM database (might be in HMMERDB or current directory)
111 ***********************************************/
113 if ((infp = HMMFileOpen(infile, "HMMERDB")) == NULL)
114 Die("Failed to open HMM database %s\n%s", infile, usage);
116 /***********************************************
117 * Open output HMM file
118 ***********************************************/
121 { /* If we're appending to a file, it needs to be Plan7 format */
124 if (FileExists(outfile)) {
125 test = HMMFileOpen(outfile, NULL);
127 Die("%s not an HMM file; I refuse to append to it; using stdout instead",
130 /* bug #14 fix. 12/24/00, xref STL3 p.133. */
131 if (test->is_binary && outfmt != P7BINARY)
132 Die("File %s is in Plan 7 binary format; must append the same fmt.", outfile);
133 else if (! test->is_binary && outfmt != P7ASCII)
134 Die("File %s is in Plan 7 ASCII format; must append the same fmt.", outfile);
139 case P7ASCII: mode = "a"; break;
140 case P7BINARY: mode = "ab"; break;
141 case GCGPROFILE: Die("You cannot append GCG profiles");
142 case BICPROFILE: Die("You cannot append Compugen extended profiles");
143 default: Die("unexpected format");
147 { /* else, we're writing a new file */
148 if (! do_force && FileExists(outfile))
149 Die("Output HMM file %s already exists. Please rename or delete it.", outfile);
151 case P7ASCII: mode = "w"; break;
152 case P7BINARY: mode = "wb"; break;
153 case GCGPROFILE: mode = "w"; break;
154 case BICPROFILE: mode = "w"; break;
155 default: Die("unexpected format");
158 if ((outfp = fopen(outfile, mode)) == NULL)
159 Die("Failed to open output file %s for writing", outfile);
161 /***********************************************
163 ***********************************************/
165 Banner(stdout, banner);
166 printf( "Input HMM file: %s\n", infile);
167 printf( "Output HMM file: %s\n", outfile);
168 printf( "Converting to: ");
170 case P7ASCII: puts("HMMER Plan7 ASCII"); break;
171 case P7BINARY: puts("HMMER Plan7 binary"); break;
172 case GCGPROFILE: puts("GCG Profile .prf"); break;
173 case BICPROFILE: puts("Compugen .eprf profile"); break;
174 default: Die("unexpected fault");
176 printf("- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n\n");
178 /***********************************************
180 ***********************************************/
183 while (HMMFileRead(infp, &hmm)) {
185 Die("HMM file %s may be corrupt or in incorrect format; parse failed", infile);
188 case P7ASCII: WriteAscHMM(outfp, hmm); break;
189 case P7BINARY: WriteBinHMM(outfp, hmm); break;
190 case GCGPROFILE: WriteProfile(outfp, hmm, FALSE); break;
191 case BICPROFILE: WriteProfile(outfp, hmm, TRUE); break;
192 default: Die("unexpected format");
195 printf(" - converted %s\n", hmm->name);
199 printf("\n%d HMM(s) converted and written to %s\n", nhmm, outfile);
201 /***********************************************
203 ***********************************************/