1 /*****************************************************************
2 * HMMER - Biological sequence analysis with profile HMMs
3 * Copyright (C) 1992-1999 Washington University School of Medicine
6 * This source code is distributed under the terms of the
7 * GNU General Public License. See the files COPYING and LICENSE
9 *****************************************************************/
15 * Database indexing (SSI format support)
16 * CVS $Id: ssi.h,v 1.1.1.1 2005/03/22 08:34:21 cmzmasek Exp $
18 * See: ssi_format.tex in Docs/
26 #define SSI_MAXFILES 32767 /* 2^15-1 */
27 #define SSI_MAXKEYS 2147483647L /* 2^31-1 */
30 * Use the union to save space, since the two offset types are
31 * mutually exclusive, controlled by "mode"
34 char mode; /* GSI_OFFSET_I32, for example */
36 sqd_uint32 i32; /* an offset that fseek() can use */
37 sqd_uint64 i64; /* an offset that e.g. fseeko64() can use */
40 typedef struct ssioffset_s SSIOFFSET;
41 #define SSI_OFFSET_I32 0
42 #define SSI_OFFSET_I64 1
45 * xref: SSI API documentation in ssi-format.tex
48 FILE *fp; /* open SSI index file */
49 sqd_uint32 flags; /* optional behavior flags */
50 sqd_uint16 nfiles; /* number of files = 16 bit int */
51 sqd_uint32 nprimary; /* number of primary keys */
52 sqd_uint32 nsecondary; /* number of secondary keys */
53 sqd_uint32 flen; /* length of filenames (inc '\0') */
54 sqd_uint32 plen; /* length of primary keys (inc '\0') */
55 sqd_uint32 slen; /* length of secondary keys (inc '\0') */
56 sqd_uint32 frecsize; /* # bytes in a file record */
57 sqd_uint32 precsize; /* # bytes in a primary key record */
58 sqd_uint32 srecsize; /* # bytes in a secondary key record */
59 SSIOFFSET foffset; /* disk offset, start of file records */
60 SSIOFFSET poffset; /* disk offset, start of pri key recs */
61 SSIOFFSET soffset; /* disk offset, start of sec key recs */
63 char imode; /* mode for index file offsets, 32 v. 64 bit */
64 char smode; /* mode for sequence file offsets, 32 v. 64 bit */
68 char **filename; /* list of file names [0..nfiles-1] */
69 sqd_uint32 *fileformat; /* file formats */
70 sqd_uint32 *fileflags; /* optional per-file behavior flags */
71 sqd_uint32 *bpl; /* bytes per line in file */
72 sqd_uint32 *rpl; /* residues per line in file */
74 typedef struct ssifile_s SSIFILE;
76 /* optional per-index behavior flags in SSIFILE structure's flags:
78 #define SSI_USE64 1<<0 /* seq offsets are 64-bit */
79 #define SSI_USE64_INDEX 1<<1 /* index file offsets are 64-bit */
81 /* optional per-file behavior flags in fileflags
83 #define SSI_FAST_SUBSEQ 1<<0 /* can do subseq lookup in this file */
85 /* Structure: SSIINDEX
87 * Used when building up an index and writing it to disk
89 struct ssipkey_s { /* Primary key data: */
90 char *key; /* key name */
91 sqd_uint16 fnum; /* file number */
92 SSIOFFSET r_off; /* record offset */
93 SSIOFFSET d_off; /* data offset */
94 sqd_uint32 len; /* sequence length */
95 sqd_uint32 handle; /* handle on this key*/
97 struct ssiskey_s { /* Secondary key data: */
98 char *key; /* secondary key name */
99 char *pkey; /* primary key name */
102 int smode; /* sequence mode: SSI_OFFSET_I32 or _I64 */
103 int imode; /* index mode: SSI_OFFSET_I32 or _I64 */
106 sqd_uint32 *fileformat;
109 sqd_uint32 flen; /* length of longest filename, inc '\0' */
112 struct ssipkey_s *pkeys;
113 sqd_uint32 plen; /* length of longest pkey, including '\0' */
115 sqd_uint32 tot_primary;
117 struct ssiskey_s *skeys;
118 sqd_uint32 slen; /* length of longest skey, including '\0' */
119 sqd_uint32 nsecondary;
120 sqd_uint32 tot_secondary;
122 /* The following stuff is for creating really big indexes, where
123 * we have to write a tmp file to disk with multiple chunks, then
124 * mergesort the chunks.
126 char *tmpbase; /* root name of tmp files: <tmpbase>.t1 and .t2 */
127 FILE *t1; /* open tmp file for collecting chunks */
128 fpos_t *chunkoffset; /* array of offsets to individual chunks; 0..nchunks-1 */
129 int nchunks; /* total # of chunks in t1 */
130 int max_chunk_size; /* maximum size of chunk to hold in memory at one time, in MB */
132 typedef struct ssiindex_s SSIINDEX;
134 /* These control malloc and realloc chunk sizes in the index
137 #define SSI_FILE_BLOCK 10
138 #define SSI_KEY_BLOCK 100
140 /* Error codes set by the API
143 #define SSI_ERR_NODATA 1 /* no data? an fread() failed */
144 #define SSI_ERR_NO_SUCH_KEY 2 /* that key's not in the index */
145 #define SSI_ERR_MALLOC 3
146 #define SSI_ERR_NOFILE 4 /* no such file? an fopen() failed */
147 #define SSI_ERR_BADMAGIC 5 /* magic number mismatch in GSIOpen() */
148 #define SSI_ERR_BADFORMAT 6 /* didn't read what I expected to fread() */
149 #define SSI_ERR_NO64BIT 7 /* needed 64-bit support and didn't have it */
150 #define SSI_ERR_SEEK_FAILED 8 /* an fseek() (or similar) failed */
151 #define SSI_ERR_TELL_FAILED 9 /* an ftell() (or similar) failed */
152 #define SSI_ERR_NO_SUBSEQS 10 /* fast subseq is disallowed */
153 #define SSI_ERR_RANGE 11 /* subseq requested is out of range */
154 #define SSI_ERR_BADARG 12 /* something wrong with a function argument */
156 #define SSI_ERR_TOOMANY_FILES 13 /* ran out of range for files in an index */
157 #define SSI_ERR_TOOMANY_KEYS 14 /* ran out of range for keys in an index */
158 #define SSI_ERR_FWRITE 15
160 /* The SSI file reading API:
162 extern int SSIOpen(char *filename, SSIFILE **ret_sfp);
163 extern int SSIGetOffsetByName(SSIFILE *sfp, char *key, int *ret_fh,
164 SSIOFFSET *ret_offset);
165 extern int SSIGetOffsetByNumber(SSIFILE *sfp, int n, int *ret_fh,
166 SSIOFFSET *ret_offset);
167 extern int SSIGetSubseqOffset(SSIFILE *sfp, char *key, int requested_start,
168 int *ret_fh, SSIOFFSET *record_offset,
169 SSIOFFSET *data_offset, int *ret_actual_start);
170 extern int SSISetFilePosition(FILE *fp, SSIOFFSET *offset);
171 extern int SSIFileInfo(SSIFILE *sfp, int fh, char **ret_filename, int *ret_format);
172 extern void SSIClose(SSIFILE *sfp);
174 /* The SSI index file writing API:
176 extern int SSIRecommendMode(char *file);
177 extern SSIINDEX *SSICreateIndex(int mode);
178 extern int SSIGetFilePosition(FILE *fp, int mode, SSIOFFSET *ret_offset);
179 extern int SSIAddFileToIndex(SSIINDEX *g, char *filename, int fmt, int *ret_fh);
180 extern int SSISetFileForSubseq(SSIINDEX *g, int fh, int bpl, int rpl);
181 extern int SSIAddPrimaryKeyToIndex(SSIINDEX *g, char *key, int fh,
182 SSIOFFSET *r_off, SSIOFFSET *d_off,
184 extern int SSIAddSecondaryKeyToIndex(SSIINDEX *g, char *key, char *pkey);
185 extern int SSIWriteIndex(char *file, SSIINDEX *g);
186 extern void SSIFreeIndex(SSIINDEX *g);
188 /* The SSI misc. functions API:
190 extern char *SSIErrorString(int n);
193 #endif /*SSIH_INCLUDED*/