1 /*****************************************************************
2 * HMMER - Biological sequence analysis with profile HMMs
3 * Copyright (C) 1992-1999 Washington University School of Medicine
6 * This source code is distributed under the terms of the
7 * GNU General Public License. See the files COPYING and LICENSE
9 *****************************************************************/
11 /* sindex_main.c, SRE, Fri Feb 16 08:38:39 2001 [St. Louis]
13 * sindex -- create SSI index of sequence file(s) for sfetch
15 * CVS $Id: sindex_main.c,v 1.1.1.1 2005/03/22 08:34:31 cmzmasek Exp $
23 static char banner[] = "sindex - create SSI index of sequence file(s) for sfetch";
25 static char usage[] = "\
26 Usage: sindex [-options] <seqfile>...\n\
28 -h : help; print version and usage info.\n\
29 -o <f> : output the SSI index to file named <f>\n\
32 static char experts[] = "\
33 --informat <s> : specify input sequence file format <s>\n\
36 struct opt_s OPTIONS[] = {
37 { "-h", TRUE, sqdARG_NONE },
38 { "-o", TRUE, sqdARG_STRING },
39 { "--64", FALSE< sqdARG_NONE },
40 { "--informat", FALSE, sqdARG_STRING },
42 #define NOPTIONS (sizeof(OPTIONS) / sizeof(struct opt_s))
45 main(int argc, char **argv)
47 char *file; /* name of a sequence file */
48 SQFILE *sfp; /* open sequence file */
49 int format; /* forced sequence file format, if any */
50 int mode; /* SSI_OFFSET_I32 or SSI_OFFSET_I64 */
51 int idx; /* counter over files */
52 int status; /* return status from an SSI call */
53 SSIINDEX *ssi; /* the index we're creating */
54 char *ssifile; /* file name for the SSI index */
55 int fh; /* handle on current file */
56 char *seq; /* a sequence read from the file */
57 SQINFO sqinfo; /* info on the sequence */
63 /***********************************************
64 * Parse the command line
65 ***********************************************/
67 /* initializations and defaults */
68 format = SQFILE_UNKNOWN; /* autodetecting format is the default */
69 mode = SSI_OFFSET_I32; /* default = 32 bit mode */
70 ssifile = NULL; /* default: set SSI file name as <file>.ssi */
72 while (Getopt(argc, argv, OPTIONS, NOPTIONS, usage,
73 &optind, &optname, &optarg))
75 if (strcmp(optname, "--64") == 0) mode = SSI_OFFSET_I64;
76 else if (strcmp(optname, "-o") == 0) ssifile = sre_strdup(optarg, -1);
77 else if (strcmp(optname, "--informat") == 0) {
78 format = String2SeqfileFormat(optarg);
79 if (format == SQFILE_UNKNOWN)
80 Die("unrecognized input sequence file format \"%s\"", optarg);
82 else if (strcmp(optname, "-h") == 0) {
83 Banner(stdout, banner);
90 if (argc - optind < 1)
91 Die("Incorrect number of command line arguments.\n%s\n", usage);
94 /*****************************************************************
96 *****************************************************************/
98 /* Determine whether we'll index in 32-bit or 64-bit mode.
99 * 32-bit is default, but 64-bit trumps; if any file needs 64-bit,
100 * we index them all that way.
102 for (idx = optind; idx < argc; idx++)
105 if ((status = SSIRecommendMode(file)) == -1)
106 Die("Couldn't stat %s - file doesn't exist, or is too big", file);
107 if (status == SSI_OFFSET_I64) mode = SSI_OFFSET_I64;
110 if (ssifile == NULL) {
111 ssifile = sre_strdup(file, -1);
112 sre_strcat(&ssifile, -1, ".ssi", -1);
115 if ((ssi = SSICreateIndex(mode)) == NULL)
116 Die("Couldn't allocate/initialize the new SSI index\n");
118 /*****************************************************************
119 * Go through the files one at a time and compile index.
120 *****************************************************************/
122 for (idx = optind; idx < argc; idx++)
125 printf("Working on file %s... \t", file);
128 if ((sfp = SeqfileOpenForIndexing(file, format, NULL, mode)) == NULL)
129 Die("Failed to open sequence file %s for reading", file);
131 if ((status = SSIAddFileToIndex(ssi, file, sfp->format, &fh)) != 0)
132 Die("SSI error: %s\n", SSIErrorString(status));
134 while (ReadSeq(sfp, sfp->format, &seq, &sqinfo)) {
135 if ((status = SSIAddPrimaryKeyToIndex(ssi, sqinfo.name, fh,
136 &(sfp->r_off), &(sfp->d_off),
138 Die("SSI error: %s\n", SSIErrorString(status));
141 if (mode == SSI_OFFSET_I32)
142 SQD_DPRINTF2(("Added primary key %s: r_off=%d, d_off=%d len=%d\n",
143 sqinfo.name, sfp->r_off.off.i32,
144 sfp->d_off.off.i32, sqinfo.len));
146 SQD_DPRINTF2(("Added primary key %s: r_off=%lld, d_off=%lld len=%d\n",
147 sqinfo.name, sfp->r_off.off.i64, sfp->d_off.off.i64,
151 if (sqinfo.flags & SQINFO_ACC) {
152 if ((status = SSIAddSecondaryKeyToIndex(ssi, sqinfo.acc, sqinfo.name)) != 0)
153 Die("SSI error: %s\n", SSIErrorString(status));
156 FreeSequence(seq, &sqinfo);
158 if (sfp->bpl > 0 && sfp->rpl > 0) {
159 if ((status = SSISetFileForSubseq(ssi, fh, sfp->bpl, sfp->rpl)) != 0)
160 Die("SSI error: %s\n", SSIErrorString(status));
161 printf("FAST_SUBSEQ set...\t");
168 printf("Sorting and writing index to SSI file %s...\t", ssifile);
170 if ((status = SSIWriteIndex(ssifile, ssi)) != 0)
171 Die("SSIWriteIndex() failed: %s", SSIErrorString(status));
174 printf("%s:\n", ssifile);
176 mode == SSI_OFFSET_I32 ? "32-bit" : "64-bit");
177 printf("Files: %d\n", ssi->nfiles);
178 printf("Primary keys: %d\n", ssi->nprimary);
179 printf("Secondary keys: %d\n", ssi->nsecondary);