1 /* copyright (c) 1996, 1997, 1998, 1999 William R. Pearson and the
4 /* $Name: fa_34_26_5 $ - $Id: llgetaa.c,v 1.25 2007/01/08 15:38:46 wrp Exp $ */
7 Feb, 1998 - version for prss
9 March, 2001 - modifications to support comp_thr.c: use libpos to indicate
10 whether the score is shuffled==1 or unshuffled==0. This simplifies
11 complib.c and makes comp_thr.c possible
13 modified version of nxgetaa.c that generates random sequences
36 #define min(x,y) ((x) > (y) ? (y) : (x))
39 int nsfnum; /* number of superfamily numbers */
40 int sfnum[10]; /* superfamily number from types 0 and 5 */
44 static int use_stdin=0;
45 static char llibstr0[256];
46 static char llibstr1[256];
47 static char o_line[256];
50 #define FASTA_FORMAT 1
52 static int seq_format=NO_FORMAT;
53 static char seq_title[200];
55 extern int irand(int);
56 extern void shuffle(unsigned char *from, unsigned char *to, int n);
57 extern void wshuffle(unsigned char *from, unsigned char *to, int n, int wsiz, int *ieven);
60 getseq(char *filen, int *qascii,
61 unsigned char *seq, int maxs, char *libstr,
62 int n_libstr, long *sq0off)
68 int sstart, sstop, sset=0;
70 int desc_complete = 0;
77 if ((bp=strchr(filen,':'))!=NULL) {
79 if ((bp=strchr(filen+3,':'))!=NULL) {
82 if (*(bp+1)=='-') sscanf(bp+2,"%d",&sstop);
83 else sscanf(bp+1,"%d-%d",&sstart,&sstop);
87 if (strcmp(filen,"-") && strcmp(filen,"@")) {
88 if ((fptr=fopen(filen,"r"))==NULL) {
89 fprintf(stderr," could not open %s\n",filen);
100 if ((bp=strchr(o_line,'\001'))!=NULL) *bp='\0';
101 strncpy(llibstr1,o_line,sizeof(llibstr1));
102 strncpy(libstr,o_line,n_libstr);
103 libstr[n_libstr-1]='\0';
108 filen[strlen(filen)]=':';
109 if (*sq0off==1 || sstart>1) *sq0off = sstart;
114 while(fgets(line,sizeof(line),fptr)!=NULL) {
117 strncpy(o_line,line,sizeof(o_line));
121 seq_format = FASTA_FORMAT;
123 qascii['*'] = qascii['X'];
125 sfnum[0] = nsfnum = 0;
127 if ((bp=(char *)strchr(line,'\n'))!=NULL) {
128 *bp='\0'; /* have newline */
132 if ((bp=strchr(line+1,'\001'))!=NULL) *bp='\0';
133 strncpy(seq_title,line+1,sizeof(seq_title));
134 strncpy(llibstr0,line+1,sizeof(llibstr0));
135 if (n_libstr <= 20) {
136 if ((bp=(char *)strchr(line,' '))!=NULL) *bp='\0';
138 strncpy(libstr,line+1,n_libstr);
139 libstr[n_libstr-1]='\0';
141 if (!desc_complete) {
142 while (fgets(line, sizeof(line), fptr) != NULL) {
143 if (strchr(line,'\n') != NULL) {
151 else if (seq_format==NO_FORMAT) {
152 seq_format = GCG_FORMAT;
153 qascii['*'] = qascii['X'];
156 while (strncmp(&line[llen-3],"..\n",(size_t)3) != 0) {
157 if (fgets(line,sizeof(line),fptr)==NULL) return 0;
160 if (n_libstr <= 20) {
161 if ((bp=(char *)strchr(line,' '))!=NULL) *bp='\0';
162 else if ((bp=(char *)strchr(line,'\n'))!=NULL) *bp='\0';
164 strncpy(libstr,line,n_libstr);
165 libstr[n_libstr-1]='\0';
166 if (fgets(line,sizeof(line),fptr)==NULL) return 0;
169 if (seq_format==GCG_FORMAT && strlen(line)<l_offset) continue;
171 if (line[0]!='>'&& line[0]!=';') {
172 for (i=l_offset; (n<maxs)&&
173 ((ic=qascii[line[i]&AAMASK])<EL); i++)
174 if (ic<NA) seq[n++]= ic;
179 strncpy(o_line,line,sizeof(o_line));
190 fprintf(stderr," sequence may be truncated %d %d\n",n,maxs);
193 if ((bp=strchr(libstr,'\n'))!=NULL) *bp = '\0';
194 if ((bp=strchr(libstr,'\r'))!=NULL) *bp = '\0';
197 if (fptr!=stdin) fclose(fptr);
200 if (sstart <= 0) sstart = 1;
201 if (sstop <= 0) sstop = n;
204 for (i=0, j=sstart; j<=sstop; i++,j++)
206 n = sstop - sstart +1;
214 gettitle(filen,title,len)
215 char *filen, *title; int len;
227 if (use_stdin == 1) {
229 strncpy(title,llibstr0,len);
232 strncpy(title,llibstr1,len);
234 if ((bp=strchr(title,'\001'))!=NULL) *bp='\0';
235 return strlen(title);
238 if ((bp=strchr(filen,':'))!=NULL) { *bp='\0'; sset=1;}
240 if ((fptr=fopen(filen,"r"))==NULL) {
241 fprintf(stderr," file %s was not found\n",filen);
246 if (sset==1) filen[strlen(filen)]=':';
248 while(fgets(line,sizeof(line),fptr)!=0) {
249 if (line[0]=='>'|| line[0]==';') goto found;
256 if ((bp=strchr(line,'\001'))!=NULL) *bp = 0;
258 bp = strpbrk(line,"\n\r");
260 bp = strchr(line,'\n');
262 if (bp!=NULL) *bp = 0;
263 strncpy(title,line,len);
266 return strlen(title);
273 int lfflag=0; /* flag for CRLF in EMBL CDROM files */
274 #define LFCHAR '\015' /* for MWC 5.5 */
276 int agetlib(); void aranlib(); /* pearson fasta format */
278 /* the following is from fgetgb.c */
280 /* a file name for openlib may now include a library type suffix */
281 /* only opens fasta format files */
283 static char libn_save[MAX_FN];
284 static int ldna_save=0;
285 static int do_shuffle;
286 static int shuff_cnt=10;
287 static int w_flag = 0;
289 static FILE *dfile=NULL;
291 static unsigned char *aa_save;
295 /* lmf_str * is used here for compatibility with the "normal" openlib,
296 but is largely unnecessary */
299 set_shuffle(struct mngmsg m_msg) {
302 if (m_msg.shuff_wid > 0) w_flag = m_msg.shuff_wid;
303 if (m_msg.shuff_max > shuff_cnt) shuff_cnt = m_msg.shuff_max;
306 if (m_msg.dfile[0]!='\0') {
307 strncpy(dfname,m_msg.dfile,sizeof(dfname));
308 strncat(dfname,"_rlib",sizeof(dfname));
309 dfile = fopen(dfname,"w");
315 openlib(char *lname, int ldnaseq, int *sascii, int quiet, struct lmf_str *m_fd)
317 char rline[10],libn[MAX_FN], *bp;
318 int wcnt, ll, opnflg;
320 struct lmf_str *m_fptr;
325 strncpy(libn_save,lname,sizeof(libn_save));
327 /* now allocate a buffer for the opened text file */
328 if ((m_fptr = calloc(1,sizeof(struct lmf_str)))==NULL) {
329 fprintf(stderr," cannot allocate lmf_str (%ld) for %s\n",
330 sizeof(struct lmf_str),lname);
334 strncpy(m_fptr->lb_name,lname,MAX_FN);
335 m_fptr->lb_name[MAX_FN-1]='\0';
337 m_fptr->sascii = sascii;
338 m_fptr->getlib = agetlib;
339 m_fptr->ranlib = aranlib;
343 irand(0); /* initialize the random number generator */
356 if (dfile) fclose(dfile);
361 static char *desc_save;
364 agetlib(unsigned char *seq,
370 struct lmf_str *lf_fd,
382 if ((n1_save = getseq(libn_save,lf_fd->sascii,
383 seq,maxs,lib_desc,sizeof(lib_desc),&sq1_off)) < 1)
386 strncpy(libstr,lib_desc,n_libstr);
387 libstr[n_libstr-1]='\0';
389 if ((aa_save = (unsigned char *)calloc(n1_save+1,sizeof(unsigned char)))==
390 NULL) fprintf(stderr," cannot allocate %d for saved sequence\n",
392 memcpy((void *)aa_save,(void *)seq,n1_save);
395 (char *)calloc(strlen(lib_desc)+1,sizeof(char)))== NULL) {
396 fprintf(stderr," cannot allocate saved desciption [%d]\n",
400 strncpy (desc_save,lib_desc,strlen(lib_desc));
401 desc_save[strlen(lib_desc)]=='\0';
407 else { /* return a shuffled sequence - here we need a window size; */
408 strncpy(libstr,desc_save,n_libstr);
409 libstr[n_libstr-1]='\0';
411 if (shuff_cnt-- <= 0 ) return -1;
412 if (w_flag > 0) wshuffle(aa_save,seq,n1_save,w_flag,&ieven);
413 else shuffle(aa_save,seq,n1_save);
414 seq[n1_save] = EOSEQ;
417 fprintf(dfile,">%d\n",shuff_cnt);
418 for (i=0; i<n1_save; i++) {
419 if (aa[seq[i]]>0) fputc(aa[seq[i]],dfile);
420 else {fprintf(stderr,"error aa0[%d]: %d %d\n",
421 i,seq[i],aa[seq[i]]);}
422 if (i%60 == 59) fputc('\n',dfile);
437 struct lmf_str *lm_fd)
442 if (use_stdin == 2) {
443 if (llibstr1[0]=='>' || llibstr1[0]==';') {
444 strncpy(str,llibstr1+1,cnt);
447 strncpy(str,llibstr1,cnt);
451 strncpy(str,desc_save,cnt);
454 if ((bp = strchr(str,'\001'))!=NULL) *bp='\0';
455 else if ((bp = strchr(str,'\n'))!=NULL) *bp='\0';
456 else str[cnt-1]='\0';
461 revcomp(unsigned char *seq, int n, int *c_nt)
467 for (i=0, ni = n-1; i< n/2; i++,ni--) {
469 seq[i] = c_nt[seq[ni]];
474 seq[i] = c_nt[seq[i]];
480 re_openlib(struct lmf_str *om_fptr, int outtty)
485 int re_getlib(unsigned char *aa1, int n1, int maxt3, int loff, int cont,
486 int term_code, long *loffset, long *l_off,
487 struct lmf_str *m_file_p)