1 /*****************************************************************
2 * HMMER - Biological sequence analysis with profile HMMs
3 * Copyright (C) 1992-1999 Washington University School of Medicine
6 * This source code is distributed under the terms of the
7 * GNU General Public License. See the files COPYING and LICENSE
9 *****************************************************************/
15 * Database indexing (GSI format support)
16 * RCS $Id: gsi.h,v 1.1.1.1 2005/03/22 08:34:18 cmzmasek Exp $
18 * A GSI (generic sequence index) file is composed of
19 * recnum + nfiles + 1 records. Each record contains
20 * three fields; key, file number, and disk offset.
22 * [ "GSI" ] [ nfiles ] [ recnum ]
23 * Records 1..nfiles map file names to file numbers, and contain:
24 * [ filename ] [ file number, 1..nfiles ] [ 0 (unused) ]
25 * Records nfiles+1 to recnum+nfiles+1 provide disk offset
26 * and file number indices for every key:
27 * [ key ] [ file number ] [ offset]
29 * Because the file is binary, we take some (but not
30 * complete) care to improve portability amongst platforms.
31 * This means using network order integers (see ntohl())
32 * and defining types for 16 and 32 bit integers.
34 * Because we use 32-bit offsets, ftell(), and fseek(),
35 * there is an implicit 2 Gb file size maximum.
36 * AFAIK neither ANSI C nor POSIX provide a portable solution
37 * to this problem. fsetpos(), fgetpos() use an
38 * opaque fpos_t datatype that we can't write portably
39 * to a disk file. Suggestions welcomed.
41 #define GSI_KEYSIZE 32 /* keys are 32 bytes long */
42 #define GSI_RECSIZE 38 /* 32 + 2 + 4 bytes */
43 #define SQD_UINT16_MAX 65535 /* 2^16-1 */
44 #define SQD_UINT32_MAX 4294967295U/* 2^32-1 */
47 FILE *gsifp; /* open GSI index file */
48 sqd_uint16 nfiles; /* number of files = 16 bit int */
49 sqd_uint32 recnum; /* number of records = 32 bit int */
51 typedef struct gsi_s GSIFILE;
54 char key[GSI_KEYSIZE];
63 struct gsikey_s *elems;
70 extern GSIFILE *GSIOpen(char *gsifile);
71 extern int GSIGetRecord(GSIFILE *gsi, char *f1, sqd_uint16 *f2, sqd_uint32 *f3);
72 extern int GSIGetOffset(GSIFILE *gsi, char *key, char *sqfile,
73 int *fmt, long *ret_offset);
74 extern void GSIClose(GSIFILE *gsi);
75 extern struct gsiindex_s *GSIAllocIndex(void);
76 extern void GSIFreeIndex(struct gsiindex_s *g);
77 extern void GSIAddFileToIndex(struct gsiindex_s *g, char *filename, int fmt);
78 extern void GSIAddKeyToIndex(struct gsiindex_s *g, char *key, int filenum, long offset);
79 extern void GSISortIndex(struct gsiindex_s *g);
80 extern void GSIWriteIndex(FILE *fp, struct gsiindex_s *g);
81 extern void GSIWriteHeader(FILE *fp, int nfiles, long nkeys);
82 extern int GSIWriteFileRecord(FILE *fp, char *fname, int idx, int fmt);
83 extern int GSIWriteKeyRecord(FILE *fp, char *key, int fileidx, long offset);
85 #endif /*GSIH_INCLUDED*/