1 /*****************************************************************
2 * HMMER - Biological sequence analysis with profile HMMs
3 * Copyright (C) 1992-1999 Washington University School of Medicine
6 * This source code is distributed under the terms of the
7 * GNU General Public License. See the files COPYING and LICENSE
9 *****************************************************************/
11 #ifndef GSI64H_INCLUDED
12 #define GSI64H_INCLUDED
16 * Database indexing (GSI64 format support)
17 * CVS $Id: gsi64.h,v 1.1.1.1 2005/03/22 08:34:29 cmzmasek Exp $
19 * A GSI64 (generic sequence index, 64 bit hack) file is composed of
20 * recnum + nfiles + 1 records. Each record contains
21 * three fields; key, file number, and disk offset.
23 * [ "GSI64" ] [ nfiles ] [ recnum ]
24 * Records 1..nfiles map file names to file numbers, and contain:
25 * [ filename ] [ file number, 1..nfiles ] [ 0 (unused) ]
26 * Records nfiles+1 to recnum+nfiles+1 provide disk offset
27 * and file number indices for every key:
28 * [ key ] [ file number ] [ offset]
30 * Because the file is binary, we take some (but not
31 * complete) care to improve portability amongst platforms.
32 * This means using network order integers (see ntohl())
33 * and defining types for 16 and 64 bit integers.
35 * A short test program that verifies the sizes of these
36 * data types would be a good idea...
38 * Because we use 64-bit offsets, ftell64(), and fseek64(),
39 * we rely on the OS actually providing these. This is
40 * a temporary hack for human genome analysis.
42 typedef unsigned long long sqd_uint64; /* 64 bit integer. */
44 #define GSI64_KEYSIZE 32 /* keys are 32 bytes long */
45 #define GSI64_RECSIZE 42 /* 32 + 2 + 8 bytes */
46 #define SQD_UINT16_MAX 65535 /* 2^16-1 */
47 #define SQD_UINT64_MAX 18446744073709551615LU /* 2^64-1 */
50 FILE *gsifp; /* open GSI index file */
51 sqd_uint16 nfiles; /* number of files = 16 bit int */
52 sqd_uint64 recnum; /* number of records = 64 bit int */
54 typedef struct gsi64_s GSI64FILE;
57 char key[GSI64_KEYSIZE];
66 struct gsi64key_s *elems;
72 /* if ntohl() and friends are not available, you
73 * can slip replacements in by providing sre_ntohl()
74 * functions. (i.e., there is a possible portability problem here.)
77 #define sre_ntohl(x) ntohl(x);
78 #define sre_ntohs(x) ntohs(x);
79 #define sre_htonl(x) htonl(x);
80 #define sre_htons(x) htons(x);
85 extern GSI64FILE *GSI64Open(char *gsifile);
86 extern int GSI64GetRecord(GSI64FILE *gsi, char *f1, sqd_uint16 *f2, sqd_uint64 *f3);
87 extern int GSI64GetOffset(GSI64FILE *gsi, char *key, char *sqfile,
88 int *fmt, long long *ret_offset);
89 extern void GSI64Close(GSI64FILE *gsi);
90 extern struct gsi64index_s *GSI64AllocIndex(void);
91 extern void GSI64FreeIndex(struct gsi64index_s *g);
92 extern void GSI64AddFileToIndex(struct gsi64index_s *g, char *filename, int fmt);
93 extern void GSI64AddKeyToIndex(struct gsi64index_s *g, char *key, int filenum, long long offset);
94 extern void GSI64SortIndex(struct gsi64index_s *g);
95 extern void GSI64WriteIndex(FILE *fp, struct gsi64index_s *g);
96 extern void GSI64WriteHeader(FILE *fp, int nfiles, long long nkeys);
97 extern int GSI64WriteFileRecord(FILE *fp, char *fname, int idx, int fmt);
98 extern int GSI64WriteKeyRecord(FILE *fp, char *key, int fileidx, long long offset);
100 #endif /* USE_GSI64 */
101 #endif /*GSIH_INCLUDED*/