+++ /dev/null
-/* mmgetaa.c - functions for mmap()ed access to libraries */
-
-/* copyright (c) 1999,2000 William R. Pearson */
-
-/* version 0 September, 1999 */
-
-/*
- This is one of two alternative files that can be used to
- read a database. The two files are nmgetaa.c, and mmgetaa.c
- (nxgetaa.c has been retired).
-
- nmgetlib.c and mmgetaa.c are used together. nmgetlib.c provides
- the same functions as nxgetaa.c if memory mapping is not used,
- mmgetaa.c provides the database reading functions if memory
- mapping is used. The decision to use memory mapping is made on
- a file-by-file basis.
-*/
-
-/* $Name: fa_34_26_5 $ - $Id: mmgetaa.c,v 1.41 2006/04/12 18:00:02 wrp Exp $ */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <string.h>
-#include <errno.h>
-
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/mman.h>
-#include <fcntl.h>
-
-#define MAXLINE 512
-#define EOSEQ 0
-
-#define XTERNAL
-#include "uascii.h"
-/* #include "upam.h" */
-#undef XTERNAL
-
-#ifdef SUPERFAMNUM
-extern int nsfnum; /* number of superfamily numbers */
-extern int sfnum[10]; /* superfamily number from types 0 and 5 */
-extern int nsfnum_n;
-extern int sfnum_n[10];
-static char tline[MAXLINE];
-#endif
-
-#define GCGBIN 6
-
-#ifndef MAP_FILE
-#define MAP_FILE 0
-#endif
-
-#include "defs.h"
-#include "mm_file.h"
-
-extern MM_OFF bl2_long8_cvt(int64_t);
-extern int bl2_uint4_cvt(int);
-
-
-long crck(char *, int);
-extern void src_int4_read(FILE *fd, int *val);
-extern void src_long4_read(FILE *fd, long *valp);
-extern void src_long8_read(FILE *fd, int64_t *val);
-
-/* load_mmap() loads the d_pos[] and s_pos[] arrays for rapid access */
-
-struct lmf_str *
-load_mmap(FILE *libi, /* fd for already open ".xin" file */
- char *sname, /* name of sequence database file */
- int lib_type, /* 0-Fasta, 5-vms_pir, 6-gcg_binary */
- int ldnaseq, /* 1 for DNA, 0 for protein */
- struct lmf_str *m_fd)
-{
- char format[4];
- int i, lib_aa;
- MM_OFF f_size;
- long lf_size;
- struct stat statbuf;
- int max_cnt;
- MM_OFF *d_pos_arr, *s_pos_arr;
- int mm_flag, mm64_flag;
- int *tmp_pos_arr;
-
- /* first check that the necessary indices are up-to-date */
- /* read the offsets in ".xin" file */
- if (fread(format,1,4,libi)==0) {
- fprintf(stderr," cannot read .xin format\n");
- return NULL;
- }
-
- mm64_flag = (format[2]==1); /* 4 bytes or 8 bytes for long? */
-
-#ifndef BIG_LIB64
- if (mm64_flag) {return NULL;}
-#endif
-
- if (format[3]!=lib_type) {
- fprintf(stderr," cannot read format %d != lib_type %d\n",
- format[3],lib_type);
- return NULL;
- }
-
- src_int4_read(libi,&lib_aa);
- if (lib_aa == ldnaseq) { /* database residue mismatch */
- fprintf(stderr," residue type mismatch %s != %s (.xin) in %s\n",
- (lib_aa ? "DNA" : "prot."),(ldnaseq ? "prot." : "DNA"),
- sname);
- return NULL;
- }
-
- /* everything looks good, allocate an lmf_str */
-
- m_fd->lib_aa = lib_aa;
-
- /* get get file size from index */
- if (mm64_flag) src_long8_read(libi,&f_size);
- else {
- src_long4_read(libi,&lf_size);
- f_size = lf_size;
- }
-
- /* now, start to open mmap()ed file */
- mm_flag=((m_fd->mmap_fd=open(sname,O_RDONLY))>=0);
- if (!mm_flag) {
- fprintf(stderr," cannot open %s for mmap()", sname);
- perror("...");
- return NULL; /* file did not open */
- }
-
- /* fstat the library file and get size */
- if(fstat(m_fd->mmap_fd, &statbuf) < 0) {
- fprintf(stderr," cannot stat %s for mmap()", sname);
- perror("...");
- m_fd->mm_flg = 0;
- goto finish;
- }
-
- /* check for identical sizes - if different, do not mmap */
- if (f_size != statbuf.st_size) {
- fprintf(stderr," %s file size (%lld) and expected size (%ld) don't match\n",
- sname,statbuf.st_size,f_size);
- mm_flag = 0;
- goto finish;
- }
-
- /* the index file and library file are open and the sizes match */
- /* allocate the m_file struct and map the file */
-
- m_fd->st_size = statbuf.st_size;
- if((m_fd->mmap_base =
- mmap(NULL, m_fd->st_size, PROT_READ,
- MAP_FILE | MAP_SHARED, m_fd->mmap_fd, 0)) == (char *) -1) {
- mm_flag = 0;
-#ifdef DEBUG
- fprintf(stderr," cannot mmap %s", sname);
- perror("...");
-#endif
- }
- finish:
- close(m_fd->mmap_fd);
- if (!mm_flag) { return NULL; }
-
- /* now finish reading the index file */
- src_int4_read(libi,&max_cnt);
-
- if (mm64_flag) {
- src_long8_read(libi,&m_fd->tot_len);
- }
- else {
- src_long4_read(libi,&lf_size);
- m_fd->tot_len = lf_size;
- }
- src_long4_read(libi,&lf_size);
- m_fd->max_len = lf_size;
-
-#ifdef DEBUG
- fprintf(stderr,
- "%s\tformat: %c%c%d %d; max_cnt: %d; tot_len: %lld max_len: %ld\n",
- sname,format[0],format[1],format[2],format[3],
- max_cnt,m_fd->tot_len,m_fd->max_len);
-#endif
-
- /* allocate array of description pointers */
- if (!mm64_flag) {
- if ((tmp_pos_arr=(int *)calloc(max_cnt+1,sizeof(int)))==NULL) {
- fprintf(stderr," cannot allocate %d for tmp_pos array\n",
- max_cnt+1);
- }
- }
-
- if ((d_pos_arr=(MM_OFF *)calloc(max_cnt+1, sizeof(MM_OFF)))==NULL) {
- fprintf(stderr," cannot allocate %d for desc. array\n",max_cnt+1);
- exit(1);
- }
-
- /* read m_fd->d_pos[max_cnt+1] */
- if (mm64_flag) {
- if (fread(d_pos_arr,sizeof(MM_OFF),max_cnt+1,libi)!=
- max_cnt+1) {
- fprintf(stderr," error reading desc. offsets: %s\n",sname);
- return NULL;
- }
- }
- else {
- if (fread(tmp_pos_arr,sizeof(int),max_cnt+1,libi)!=
- max_cnt+1) {
- fprintf(stderr," error reading desc. offsets: %s\n",sname);
- return NULL;
- }
-#ifdef DEBUG
- fprintf(stderr,"d_pos_crc: %ld\n",
- crck((char *)tmp_pos_arr,sizeof(int)*(max_cnt+1)));
-#endif
- }
-
-
-#ifndef IS_BIG_ENDIAN
- if (mm64_flag)
- for (i=0; i<=max_cnt; i++) {
- d_pos_arr[i] = bl2_long8_cvt(d_pos_arr[i]);
- }
- else
- for (i=0; i<=max_cnt; i++) {
- d_pos_arr[i] = bl2_uint4_cvt(tmp_pos_arr[i]);
- }
-#else
- if (!mm64_flag) {
- for (i=0; i<=max_cnt; i++) {
- d_pos_arr[i] = tmp_pos_arr[i];
- }
- }
-#endif
-
-#ifdef DEBUG
- for (i=0; i<max_cnt-1; i++) {
- if (d_pos_arr[i+1] <= d_pos_arr[i] )
- fprintf(stderr," ** dpos_error [%d]\t%ld\t%ld\n",
- i,d_pos_arr[i],d_pos_arr[i+1]);
- }
-#endif
-
- /* allocate array of sequence pointers */
- if ((s_pos_arr=(MM_OFF *)calloc(max_cnt+1,sizeof(MM_OFF)))==NULL) {
- fprintf(stderr," cannot allocate %d for seq. array\n",max_cnt+1);
- exit(1);
- }
-
- /* read m_fd->s_pos[max_cnt+1] */
- if (mm64_flag) {
- if (fread(s_pos_arr,sizeof(long),max_cnt+1,libi)!=
- max_cnt+1) {
- fprintf(stderr," error reading seq offsets: %s\n",sname);
- return NULL;
- }
- }
- else {
- if (fread(tmp_pos_arr,sizeof(int),max_cnt+1,libi)!=
- max_cnt+1) {
- fprintf(stderr," error reading seq offsets: %s\n",sname);
- return NULL;
- }
-#ifdef DEBUG
- fprintf(stderr,"s_pos_crc: %ld\n",
- crck((char *)tmp_pos_arr,sizeof(int)*(max_cnt+1)));
-#endif
- }
-
-#ifndef IS_BIG_ENDIAN
- if (mm64_flag)
- for (i=0; i<=max_cnt; i++)
- s_pos_arr[i] = bl2_long8_cvt(s_pos_arr[i]);
- else
- for (i=0; i<=max_cnt; i++)
- s_pos_arr[i] = (long)bl2_uint4_cvt(tmp_pos_arr[i]);
-#else
- if (!mm64_flag)
- for (i=0; i<=max_cnt; i++)
- s_pos_arr[i] = (long)tmp_pos_arr[i];
-#endif
-
-#ifdef DEBUG
- for (i=1; i<max_cnt-1; i++) {
- if (s_pos_arr[i+1]<s_pos_arr[i])
- fprintf(stderr," ** spos_error [%d]\t%ld\t%ld\n",
- i,s_pos_arr[i],s_pos_arr[i]);
- }
-#endif
-
- if (!mm64_flag) free(tmp_pos_arr);
-
- m_fd->max_cnt = max_cnt;
- m_fd->d_pos_arr = d_pos_arr;
- m_fd->s_pos_arr = s_pos_arr;
- m_fd->lpos = 0;
-
- /* check_mmap(m_fd,-2); */
-
- return m_fd;
-}
-
-char *mgets (char *s, int n, struct lmf_str *m_fd)
-{
- char *cs, *mfp;
-
- mfp = m_fd->mmap_addr;
- cs = s;
-
- while (--n > 0 && (*mfp != (char)EOF))
- if ((*cs++ = *mfp++) == '\n') break;
- *cs = '\0';
-
- m_fd->mmap_addr = mfp;
- return (*mfp == (char)EOF && cs == s) ? NULL : s;
-}
-
-int
-agetlibm(unsigned char *seq,
- int maxs,
- char *libstr,
- int n_libstr,
- fseek_t *libpos,
- int *lcont,
- struct lmf_str *m_fd,
- long *l_off)
-{
- register unsigned char *cp, *seqp;
- register int *ap;
- char *desc;
- int lpos; /* entry number in library */
- long l;
- unsigned char *seqm, *seqm1;
- char *bp;
- static long seq_len;
- static unsigned char *cp_max;
-#ifdef SUPERFAMNUM
- char *bp1, *bpa, *tp;
- int i;
-#endif
-
- *l_off = 1;
-
- lpos = m_fd->lpos;
-
- seqp = seq;
- seqm = &seq[maxs-9];
- seqm1 = seqm-1;
-
- ap = m_fd->sascii;
-
- if (*lcont==0) {
- if (lpos >= m_fd->max_cnt) return (-1);
- seq_len = m_fd->d_pos_arr[lpos+1] - m_fd->s_pos_arr[lpos];
- if (seq_len < 0 || (seq_len > m_fd->max_len && seq_len > (m_fd->max_len*5)/4)) {
- fprintf(stderr," ** sequence over-run: %ld at %d\n",seq_len,lpos);
- return(-1);
- }
- *libpos = (fseek_t)lpos;
-
- desc = m_fd->mmap_base+m_fd->d_pos_arr[lpos]+1;
- strncpy(libstr,desc,n_libstr-1);
- libstr[n_libstr-1]='\0';
- if ((bp=strchr(libstr,'\r'))!=NULL) *bp='\0';
- if ((bp=strchr(libstr,'\n'))!=NULL) *bp='\0';
- if (n_libstr > MAX_UID) {
- bp = libstr;
- while (*bp++) if ( *bp=='\001' || *bp=='\t') *bp=' ';
- }
-
- for (bp = desc; *bp && (*bp != '\n'); *bp++ )
- if (*bp == '@' && !strncmp(bp+1,"C:",2)) sscanf(bp+3,"%ld",l_off);
-
-#ifdef SUPERFAMNUM
- sfnum[0]=nsfnum=0;
- strncpy(tline,desc,sizeof(tline));
- tline[MAXLINE-1]='\0';
- if ((bp=strchr(tline,'\n'))!=NULL) *bp='\0';
- if ((bp=strchr(tline,' ')) && (bp=strchr(bp+1,SFCHAR))) {
- if ((bpa = strchr(bp+1,'\001'))!=NULL) *bpa = '\0';
- if ((bp1=strchr(bp+1,SFCHAR))==NULL) {
- fprintf(stderr," second %c missing: %s\n",SFCHAR,tline);
- }
- else {
- *bp1 = '\0';
- i = 0;
- if ((tp = strtok(bp+1," \t"))!=NULL) {
- sfnum[i++] = atoi(tp);
- while ((tp = strtok((char *)NULL," \t")) != (char *)NULL) {
- sfnum[i++] = atoi(tp);
- if (i>=9) break;
- }
- }
- sfnum[nsfnum=i]= 0;
- if (nsfnum>1) sf_sort(sfnum,nsfnum);
- else {
- if (nsfnum<1) fprintf(stderr," found | but no sfnum: %s\n",libstr);
- }
- }
- }
- else {
- sfnum[0] = nsfnum = 0;
- }
-#endif
-
- m_fd->mmap_addr = m_fd->mmap_base+m_fd->s_pos_arr[lpos];
- cp_max = (unsigned char *)(m_fd->mmap_addr+seq_len);
- }
-
- for (cp=(unsigned char *)m_fd->mmap_addr; seqp<seqm1; ) {
- if ((*seqp++=ap[*cp++])<NA &&
- (*seqp++=ap[*cp++])<NA &&
- (*seqp++=ap[*cp++])<NA &&
- (*seqp++=ap[*cp++])<NA &&
- (*seqp++=ap[*cp++])<NA &&
- (*seqp++=ap[*cp++])<NA &&
- (*seqp++=ap[*cp++])<NA &&
- (*seqp++=ap[*cp++])<NA &&
- (*seqp++=ap[*cp++])<NA &&
- (*seqp++=ap[*cp++])<NA) continue;
- --seqp;
- if (cp >= cp_max) break;
- }
- m_fd->mmap_addr = (char *)cp;
-
- if (seqp>=seqm1) (*lcont)++;
- else {
- *lcont=0;
- lpos++;
- m_fd->lpos = lpos;
- }
- *seqp = EOSEQ;
- /* if ((int)(seqp-seq)==0) return 1; */
- return (int)(seqp-seq);
-}
-
-void
-aranlibm(char *str,
- int cnt,
- fseek_t libpos,
- char *libstr,
- struct lmf_str *m_fd)
-{
- char *bp;
- int llen;
- int lpos;
-
- lpos = (int) libpos;
-
- llen = m_fd->s_pos_arr[lpos]-m_fd->d_pos_arr[lpos];
- if (llen >= cnt) llen = cnt-1;
-
- strncpy(str,m_fd->mmap_base+m_fd->d_pos_arr[lpos]+1,llen);
- str[llen]='\0';
- if ((bp = strchr(str,'\r'))!=NULL) *bp='\0';
- if ((bp = strchr(str,'\n'))!=NULL) *bp='\0';
- bp = str;
- while (*bp++) if ( *bp=='\001' || *bp=='\t') *bp=' ';
- m_fd->lpos = lpos;
-}
-
-/* there is no vgetlibm() because vgetlibm() and agetlibm() are
- identical - the difference in the two file formats relates to the
- location of the sequence, which is already available in spos_arr[].
-
- however vranlibm must accomodate both type 5 and 6 files;
- type 6 has extra stuff after the seq_id.
-*/
-
-void
-vranlibm(char *str,
- int cnt,
- fseek_t libpos,
- char *libstr,
- struct lmf_str *m_fd)
-{
- char *bp, *mp;
- int llen;
- int lpos;
-
- lpos = (int)libpos;
-
- llen = m_fd->s_pos_arr[lpos]-m_fd->d_pos_arr[lpos];
-
- mp = m_fd->mmap_base+m_fd->d_pos_arr[lpos];
-
- strncpy(str,mp+4,20);
- str[20]='\0';
- if ((bp=strchr(str,' '))!=NULL) *(bp+1) = '\0';
- else if ((bp=strchr(str,'\n'))!=NULL) *bp = ' ';
- bp = strchr(mp,'\n');
-
- llen -= (bp-mp)-5;
- if (llen > cnt-strlen(str)) llen = cnt-strlen(str)-1;
-
- strncat(str,bp+1,llen);
- if ((bp = strchr(str,'\n'))!=NULL) *bp='\0';
- str[cnt-1]='\0';
- m_fd->lpos = lpos;
-}
-
-void
-close_mmap(struct lmf_str *m_fd) {
- free(m_fd->s_pos_arr);
- free(m_fd->d_pos_arr);
- if (m_fd->mm_flg) {
- munmap(m_fd->mmap_base,m_fd->st_size);
- free(m_fd);
- }
- m_fd->mm_flg=0;
-}
-
-#ifndef min
-#define min(x,y) ((x) > (y) ? (y) : (x))
-#endif
-
-static int gcg_bton[4]={2,4,1,3};
-
-int
-gcg_getlibm(unsigned char *seq,
- int maxs,
- char *libstr,
- int n_libstr,
- fseek_t *libpos,
- int *lcont,
- struct lmf_str *m_fd,
- long *l_off)
-{
- char dummy[20];
- char gcg_date[6];
- char gcg_type[10];
- register unsigned char *cp, *seqp, stmp;
- register int *ap, lpos;
- unsigned char *seqm, *seqm1;
- long r_block, b_block, r_fact, r16_block;
-
- *l_off = 1;
-
- seqp = seq;
- seqm = &seq[maxs-9];
- seqm1 = seqm-1;
-
- ap = m_fd->sascii;
- lpos = m_fd->lpos;
-
- if (*lcont==0) {
- if (lpos >= m_fd->max_cnt) return (-1);
- sscanf(m_fd->mmap_base+m_fd->d_pos_arr[lpos]+4,"%s %s %s %s %ld\n",
- libstr,gcg_date,gcg_type,dummy,&(m_fd->gcg_len));
-
- m_fd->gcg_binary = (gcg_type[0]=='2');
-
- libstr[12]='\0';
- *libpos = lpos;
- m_fd->mmap_addr = m_fd->mmap_base+m_fd->s_pos_arr[lpos];
- }
-
- r_block = b_block = min((size_t)(seqm-seqp),m_fd->gcg_len);
- if (m_fd->gcg_binary) {
- r_block = (r_block+3)/4;
- }
-
- cp=(unsigned char *)m_fd->mmap_addr;
- if (!m_fd->gcg_binary) {
- r_fact = 1;
- r16_block = r_block/16;
- while (r16_block-- > 0) {
- *seqp++ = ap[*cp++];
- *seqp++ = ap[*cp++];
- *seqp++ = ap[*cp++];
- *seqp++ = ap[*cp++];
- *seqp++ = ap[*cp++];
- *seqp++ = ap[*cp++];
- *seqp++ = ap[*cp++];
- *seqp++ = ap[*cp++];
- *seqp++ = ap[*cp++];
- *seqp++ = ap[*cp++];
- *seqp++ = ap[*cp++];
- *seqp++ = ap[*cp++];
- *seqp++ = ap[*cp++];
- *seqp++ = ap[*cp++];
- *seqp++ = ap[*cp++];
- *seqp++ = ap[*cp++];
- }
- while (seqp<seq+r_block) *seqp++ = ap[*cp++];
- }
- else if (m_fd->gcg_binary) {
- r_fact = 4;
- r16_block = r_block/8;
- while(r16_block-- > 0) {
- stmp = *cp++;
- *seqp++ = gcg_bton[(stmp>>6) &3];
- *seqp++ = gcg_bton[(stmp>>4) &3];
- *seqp++ = gcg_bton[(stmp>>2) &3];
- *seqp++ = gcg_bton[(stmp) &3];
- stmp = *cp++;
- *seqp++ = gcg_bton[(stmp>>6) &3];
- *seqp++ = gcg_bton[(stmp>>4) &3];
- *seqp++ = gcg_bton[(stmp>>2) &3];
- *seqp++ = gcg_bton[(stmp) &3];
- stmp = *cp++;
- *seqp++ = gcg_bton[(stmp>>6) &3];
- *seqp++ = gcg_bton[(stmp>>4) &3];
- *seqp++ = gcg_bton[(stmp>>2) &3];
- *seqp++ = gcg_bton[(stmp) &3];
- stmp = *cp++;
- *seqp++ = gcg_bton[(stmp>>6) &3];
- *seqp++ = gcg_bton[(stmp>>4) &3];
- *seqp++ = gcg_bton[(stmp>>2) &3];
- *seqp++ = gcg_bton[(stmp) &3];
- stmp = *cp++;
- *seqp++ = gcg_bton[(stmp>>6) &3];
- *seqp++ = gcg_bton[(stmp>>4) &3];
- *seqp++ = gcg_bton[(stmp>>2) &3];
- *seqp++ = gcg_bton[(stmp) &3];
- stmp = *cp++;
- *seqp++ = gcg_bton[(stmp>>6) &3];
- *seqp++ = gcg_bton[(stmp>>4) &3];
- *seqp++ = gcg_bton[(stmp>>2) &3];
- *seqp++ = gcg_bton[(stmp) &3];
- stmp = *cp++;
- *seqp++ = gcg_bton[(stmp>>6) &3];
- *seqp++ = gcg_bton[(stmp>>4) &3];
- *seqp++ = gcg_bton[(stmp>>2) &3];
- *seqp++ = gcg_bton[(stmp) &3];
- stmp = *cp++;
- *seqp++ = gcg_bton[(stmp>>6) &3];
- *seqp++ = gcg_bton[(stmp>>4) &3];
- *seqp++ = gcg_bton[(stmp>>2) &3];
- *seqp++ = gcg_bton[(stmp) &3];
- }
-
- while (seqp < seq+4*r_block) {
- stmp = *cp++;
- *seqp++ = gcg_bton[(stmp>>6) &3];
- *seqp++ = gcg_bton[(stmp>>4) &3];
- *seqp++ = gcg_bton[(stmp>>2) &3];
- *seqp++ = gcg_bton[(stmp) &3];
- }
- }
- if (r_fact * r_block >= m_fd->gcg_len) {
- *lcont = 0;
- m_fd->lpos++;
- }
- else {
- if (m_fd->gcg_binary) b_block = 4*r_block;
- m_fd->gcg_len -= b_block;
- (*lcont)++;
- }
-
- seq[b_block] = EOSEQ;
- /* if (b_block==0) return 1; else */
- return b_block;
-}
-
-void lget_ann_m(struct lmf_str *lm_fd, char *libstr, int n_libstr);
-
-int
-lgetlibm(unsigned char *seq,
- int maxs,
- char *libstr,
- int n_libstr,
- fseek_t *libpos,
- int *lcont,
- struct lmf_str *m_fd,
- long *l_off)
-{
- register unsigned char *cp, *seqp;
- register int *ap, lpos;
- unsigned char *seqm, *seqm1;
-
- *l_off = 1;
-
- seqp = seq;
- seqm = &seq[maxs-11];
- seqm1 = seqm-1;
-
- lpos = m_fd->lpos;
- ap = m_fd->sascii;
-
- if (*lcont==0) {
- if (lpos >= m_fd->max_cnt) return (-1);
-
- if (n_libstr <= 21) {
- strncpy(libstr,m_fd->mmap_base+m_fd->d_pos_arr[lpos]+12,12);
- libstr[12]='\0';
- }
- else {
- lget_ann_m(m_fd,libstr,n_libstr);
- }
- *libpos = lpos;
-
- m_fd->mmap_addr = m_fd->mmap_base+m_fd->s_pos_arr[lpos];
- cp = (unsigned char *)m_fd->mmap_addr;
- }
- else cp = (unsigned char *)m_fd->mmap_addr;
-
- while (seqp<seqm1) {
- if (*cp=='/' && *(cp-1)=='\n') break;
- if ((*seqp++=ap[*cp++])<NA &&
- (*seqp++=ap[*cp++])<NA &&
- (*seqp++=ap[*cp++])<NA &&
- (*seqp++=ap[*cp++])<NA &&
- (*seqp++=ap[*cp++])<NA &&
- (*seqp++=ap[*cp++])<NA &&
- (*seqp++=ap[*cp++])<NA &&
- (*seqp++=ap[*cp++])<NA &&
- (*seqp++=ap[*cp++])<NA &&
- (*seqp++=ap[*cp++])<NA &&
- (*seqp++=ap[*cp++])<NA) continue;
- --seqp;
- if (*cp=='\n' && *(cp+1)==' ') cp += 11;
- }
-
- if (seqp>=seqm1) {
- (*lcont)++;
- m_fd->mmap_addr = (char *)cp;
- }
- else {
- *lcont=0;
- m_fd->lpos++;
- }
-
- *seqp = EOSEQ;
- return (int)(seqp-seq);
-}
-
-void
-lget_ann_m(struct lmf_str *lm_fd, char *libstr, int n_libstr) {
- char *bp, *bp_gid, locus[120], desc[120], acc[120], ver[120];
-
- /* copy in locus from lm_fd->lline */
- strncpy(locus,&lm_fd->mmap_addr[12],sizeof(locus));
- if ((bp=strchr(locus,' '))!=NULL) *(bp+1) = '\0';
-
- /* get description */
- mgets(desc,sizeof(desc),lm_fd);
- while (desc[0]!='D' || desc[1]!='E' || strncmp(desc,"DEFINITION",10))
- mgets(desc,sizeof(desc),lm_fd);
- if ((bp = strchr(&desc[12],'\n'))!=NULL) *bp='\0';
-
- /* get accession */
- mgets(acc,sizeof(acc),lm_fd);
- while (acc[0]!='A' || acc[1]!='C' || strncmp(acc,"ACCESSION",9)) {
- mgets(acc,sizeof(acc),lm_fd);
- if (acc[0]=='O' && acc[1]=='R' && strncmp(acc,"ORIGIN",6)==0)
- break;
- }
- if ((bp = strchr(&acc[12],'\n'))!=NULL) *bp='\0';
- if ((bp = strchr(&acc[12],' '))!=NULL) *bp='\0';
-
- /* get version */
- mgets(ver,sizeof(ver),lm_fd);
- while (ver[0]!='V' || ver[1]!='E' || strncmp(ver,"VERSION",7)) {
- mgets(ver,sizeof(ver),lm_fd);
- if (ver[0]=='O' && ver[1]=='R' && strncmp(ver,"ORIGIN",6)==0)
- break;
- }
- if ((bp = strchr(&ver[12],'\n'))!=NULL) *bp='\0';
-
- /* extract gi:123456 from version line */
- bp_gid = strchr(&ver[12],':');
- if (bp_gid != NULL) {
- if ((bp=strchr(bp_gid+1,' '))!=NULL) *bp='\0';
- bp_gid++;
- }
- if ((bp = strchr(&ver[12],' '))!=NULL) *bp='\0';
-
- /* build up FASTA header line */
- if (bp_gid != NULL) {
- strncpy(libstr,"gi|",n_libstr-1);
- strncat(libstr,bp_gid,n_libstr-4);
- strncat(libstr,"|gb|",n_libstr-20);
- }
- else {libstr[0]='\0';}
-
- /* if we have a version number, use it, otherwise accession,
- otherwise locus/description */
-
- if (ver[0]=='V') {
- strncat(libstr,&ver[12],n_libstr-1-strlen(libstr));
- strncat(libstr,"|",n_libstr-1-strlen(libstr));
- }
- else if (acc[0]=='A') {
- strncat(libstr,&acc[12],n_libstr-1-strlen(libstr));
- strncat(libstr," ",n_libstr-1-strlen(libstr));
- }
-
- strncat(libstr,locus,n_libstr-1-strlen(libstr));
- strncat(libstr,&desc[11],n_libstr-1-strlen(libstr));
- libstr[n_libstr-1]='\0';
-}
-
-void
-lranlibm(char *str,
- int cnt,
- fseek_t seek,
- char *libstr,
- struct lmf_str *m_fd)
-{
- char *bp, *llp;
- char acc[MAXLINE], desc[MAXLINE];
-
- llp = m_fd->mmap_addr = m_fd->mmap_base + m_fd->d_pos_arr[seek];
-
- lget_ann_m(m_fd,str,cnt);
-
- str[cnt-1]='\0';
-
- m_fd->lpos = seek;
-}
-
-static int check_status=0;
-
-void
-check_mmap(struct lmf_str *m_fd,long ntt) {
-
- int i, seq_len, ok_stat;
-
- ok_stat = 1;
- if ( ++check_status > 5) return;
-
- fprintf(stderr," ** checking %s %ld**\n", m_fd->lb_name,ntt);
- for (i=0; i<m_fd->max_cnt; i++) {
- seq_len = m_fd->d_pos_arr[i+1] - m_fd->s_pos_arr[i];
- if (seq_len < 0 || (seq_len > m_fd->max_len && seq_len > (m_fd->max_len*5)/4)) {
- fprintf(stderr,"%d:\t%ld\t%ld\t%ld\n",
- i,m_fd->d_pos_arr[i],m_fd->s_pos_arr[i],
- m_fd->d_pos_arr[i+1]-m_fd->s_pos_arr[i]);
- ok_stat=0;
- }
- }
- if (ok_stat) {
- if (check_status) fprintf(stderr," ** check_mmap OK %s %ld**\n",
- m_fd->lb_name,ntt);
- }
-}
-
-#ifdef DEBUG
-/* C H K 3 -- Compute a type-3 Kermit block check. */
-/*
- Calculate the 16-bit CRC of a null-terminated string using a byte-oriented
- tableless algorithm invented by Andy Lowry (Columbia University). The
- magic number 010201 is derived from the CRC-CCITT polynomial x^16+x^12+x^5+1.
- Note - this function could be adapted for strings containing imbedded 0's
- by including a length argument.
-*/
-long
-crck(s,n)
- char *s; int n;
-{
- unsigned int c, q;
- long crc = 0;
-
- while (n-->0) {
- c = *s++;
- /* if (parity)*/
- c &= 0177;
- q = (crc ^ c) & 017; /* Low-order nibble */
- crc = (crc >> 4) ^ (q * 010201);
- q = (crc ^ (c >> 4)) & 017; /* High order nibble */
- crc = (crc >> 4) ^ (q * 010201);
- }
- return(crc);
-}
-#endif