1 /* copyright (c) 1996, 1997, 1998, 1999, 2002 William R. Pearson and the
4 /* $Name: fa_34_26_5 $ - $Id: comp_lib.c,v 1.100 2007/04/26 18:36:36 wrp Exp $ */
7 * Concurrent read version
9 * Feb 20, 1998 modifications for prss3
11 * December, 1998 - DNA searches are now down with forward and reverse
27 #include <sys/types.h>
34 #include "mw.h" /* defines beststr */
35 #include "structs.h" /* mngmsg, libstruct */
36 #include "param.h" /* pstruct, thr_str, buf_head, rstruct */
41 char *mp_verstr="34.26";
43 /********************************/
44 /* global variable declarations */
45 /********************************/
46 char gstring2[MAX_STR]; /* string for label */
47 char gstring3[MAX_STR];
48 char hstring1[MAX_STR];
50 extern int max_workers;
55 extern int sfn_cmp(int *q, int *s);
60 /********************************/
61 /* extern variable declarations */
62 /********************************/
63 extern char *prog_func; /* function label */
64 extern char *verstr, *iprompt0, *iprompt1, *iprompt2, *refstr;
66 /********************************/
67 /*extern function declarations */
68 /********************************/
69 /* open sequence file (getseq.c) */
70 extern int getseq(char *filen, int *sascii,
71 unsigned char *seq, int maxs,
72 char *libstr, int n_libstr,
75 struct lmf_str *openlib(char *, int, int *, int, struct lmf_str *);
77 void set_shuffle(struct mngmsg m_msg);
78 void closelib(struct lmf_str *m_fptr);
83 extern int ann_scan(unsigned char *, int, struct mngmsg *, int );
84 extern int scanseq(unsigned char *seq, int n, char *str);
85 extern void re_ascii(int *qascii, int *sascii);
86 extern int recode(unsigned char *seq, int n, int *qascii, int nsq);
87 extern void revcomp(unsigned char *seq, int n, int *c_nt);
89 extern void init_ascii(int is_ext, int *sascii, int is_dna);
90 extern void qshuffle(unsigned char *aa0, int n0, int nm0);
91 extern void free_pam2p(int **);
93 /* initialize environment (doinit.c) */
94 extern void initenv (int argc, char **argv, struct mngmsg *m_msg,
95 struct pstruct *ppst, unsigned char **aa0);
97 /* print timing information */
98 extern void ptime (FILE *, time_t);
101 #define QGETLIB (q_file_p->getlib)
104 #define GETLIB (m_file_p->getlib)
106 /* calculation functions */
108 init_work(unsigned char *aa0, int n0,
109 struct pstruct *ppst, void **f_arg );
112 do_work(unsigned char *aa0, int n0, unsigned char *aa1, int n1, int frame,
113 struct pstruct *ppst, void *f_str, int qr_flg, struct rstruct *rst);
117 close_work(unsigned char *aa0, int n0, struct pstruct *ppst, void **f_arg);
119 get_param (struct pstruct *pstr, char *pstring1, char *pstring2);
124 save_best(struct buf_head *cur_buf, struct mngmsg, struct pstruct pst,
125 FILE *fdata, int *, struct hist_str *, void **);
128 save_best(struct buf_head *cur_buf, struct mngmsg, struct pstruct pst,
129 FILE *fdata, int *, struct hist_str *, void **, int *, int *);
133 /* statistics functions */
135 process_hist(struct stat_str *sptr, int nstat,
138 struct hist_str *hist, void **, int);
139 extern void addhistz(double, struct hist_str *); /* scaleswn.c */
140 void selectbestz(struct beststr **, int, int );
141 extern double (*find_zp)(int score, double escore, int length, double comp,void *);
143 void last_stats(const unsigned char *, int,
144 struct stat_str *sptr, int nstats,
145 struct beststr **bestp_arr, int nbest,
146 struct mngmsg m_msg, struct pstruct pst,
147 struct hist_str *histp, void *);
149 int last_calc( unsigned char **a0, unsigned char *a1, int maxn,
150 struct beststr **bestp_arr, int nbest,
151 struct mngmsg m_msg, struct pstruct *ppst,
152 void **f_str, void *rs_str);
154 void scale_scores(struct beststr **bestp_arr, int nbest,
155 struct db_str,struct pstruct pst, void *);
158 extern int shuffle(unsigned char *, unsigned char *, int);
159 extern int wshuffle(unsigned char *, unsigned char *, int, int, int *);
162 extern void set_db_size(int, struct db_str *, struct hist_str *);
164 /* display functions */
166 showbest (FILE *fp, unsigned char **aa0, unsigned char *aa1,
167 int maxn, struct beststr **bestp_arr, int nbest,
168 int qlib, struct mngmsg *m_msg,struct pstruct pst,
169 struct db_str db, char *gstring2, void **f_str);
172 showalign (FILE *fp, unsigned char **aa0, unsigned char *aa1,
173 int maxn, struct beststr **bestp_arr, int nbest,
174 int qlib, struct mngmsg m_msg,struct pstruct pst,
175 char *gstring2, void **f_str);
178 void h_init(struct pstruct *, struct mngmsg *, char *); /* doinit.c */
179 void last_init(struct mngmsg *, struct pstruct *); /* initfa/sw.c */
180 void last_params(unsigned char *, int, struct mngmsg *, struct pstruct *);
182 void s_abort(char *, char *); /* compacc.c */
185 void resetp(struct mngmsg *, struct pstruct *);
187 void gettitle(char *, char *, int); /* nxgetaa.c */
188 void libchoice(char *lname, int, struct mngmsg *); /* lib_sel.c */
189 void libselect(char *lname, struct mngmsg *); /* lib_sel.c */
190 void query_parm(struct mngmsg *, struct pstruct *); /* initfa/sw.c */
191 void selectbestz(struct beststr **, int, int);
194 void prhist(FILE *, struct mngmsg, struct pstruct,
195 struct hist_str hist, int nstats, struct db_str, char *);
196 void printsum(FILE *, struct db_str db);
197 int reset_maxn(struct mngmsg *, int); /* set m_msg.maxt, maxn from maxl */
199 FILE *outfd; /* Output file */
201 /* this information is global for fsigint() */
202 extern time_t s_time(); /* fetches time */
203 time_t tstart, tscan, tprev, tdone; /* Timing */
205 time_t ttscan, ttdisp;
207 time_t tdstart, tddone;
209 static struct db_str qtt = {0l, 0l, 0};
212 /***************************************/
213 /* thread global variable declarations */
214 /***************************************/
216 /* functions for getting/sending buffers to threads (thr_sub.c) */
217 extern void init_thr(int , struct thr_str *, struct mngmsg, struct pstruct *,
218 unsigned char *, int);
219 extern void start_thr(void);
220 extern void get_rbuf(struct buf_head **cur_buf, int max_wor_buf);
221 extern void put_rbuf(struct buf_head *cur_buf, int max_work_buf);
222 extern void put_rbuf_done(int nthreads, struct buf_head *cur_buf,
226 struct buf_head buf_list[NUM_WORK_BUF];
229 /* these variables must be global for comp_thr.c so that savebest()
232 static struct beststr
233 *best, /* array of best scores */
235 **bestp_arr; /* array of pointers */
236 static int nbest; /* number of best scores */
238 static struct stat_str *stats, *qstats; /* array of scores for statistics */
240 /* these variables are global so they can be set both by the main()
241 program and savebest() in threaded mode.
243 static int nstats, nqstats, kstats;
244 static double zbestcut; /* cut off for best z-score */
245 static int bestfull; /* index for selectbest() */
246 static int stats_done=0; /* flag for z-value processing */
250 main (int argc, char *argv[])
252 unsigned char *aa0[6], *aa0s, *aa1, *aa1ptr, *aa1s;
253 int n1, n1s; /* n1s needed for PRSS so that when getlib() returns -1 (because no more
254 library sequences, we have a valid n1 for shuffling */
256 int *n1tot_ptr=NULL, *n1tot_cur;
258 int n1tot_v, aa1_loff;
260 long qoffset; /* qoffset is the equivalent of loffset */
261 /* m_msg.sq0off is the l_off equivalent */
263 long loffset, l_off; /* loffset is the coordinate of first residue
264 when lcont > 0; l_off is not used in the
265 main loop, only in showbest and showalign */
266 char lib_label[MAX_FN];
267 char pgm_abbr[MAX_SSTR];
270 char q_bline[MAX_STR];
273 struct lmf_str *q_file_p;
274 int sstart, sstop, is;
277 struct lmf_str *m_file_p;
279 int t_best, t_rbest, t_qrbest; /* best score of two/six frames */
280 double t_escore, t_rescore, t_qrescore; /* best evalues of two/six frames */
288 void *f_str[6], *qf_str; /* different f_str[]'s for different
289 translation frames, or forward,reverse */
294 int max_buf_cnt, ave_seq_len, buf_siz;
296 struct buf_head *cur_buf;
297 struct buf_str *cur_buf_p;
299 struct thr_str *work_info;
302 struct mngmsg m_msg; /* Message from host to manager */
303 int iln, itt; /* index into library names */
305 char argv_line[MAX_STR];
308 struct rstruct rst; /* results structure */
309 struct rstruct rrst; /* results structure for shuffle*/
312 FILE *fdata=NULL; /* file for full results */
313 char libstr[MAX_UID]; /* string for labeling full results */
314 char *libstr_p; /* choose between libstr and ltitle */
315 int n_libstr; /* length of libstr */
317 int leng; /* leng is length of the descriptive line */
318 int maxn; /* size of the library sequence examined */
319 int maxl; /* size of library buffer */
320 fseek_t lmark; /* seek into library of current sequence */
321 int qlcont; /* continued query sequence */
322 int lcont, ocont, maxt; /* continued sequence */
323 int igncnt=0; /* count for ignoring sequences warning */
324 int ieven=0; /* tmp for wshuffle */
325 double zscore; /* tmp value */
326 char *bp; /* general purpose string ptr */
331 m_msg.quiet= !isatty(1);
337 argv_line[0]='#'; argv_line[1]='\0';
338 for (i=0; i<argc; i++) {
339 strncat(argv_line," ",sizeof(argv_line)-strlen(argv_line)-1);
340 if (strchr(argv[i],' ')) {
341 strncat(argv_line,"\"",sizeof(argv_line)-strlen(argv_line)-1);
342 strncat(argv_line,argv[i],sizeof(argv_line)-strlen(argv_line)-1);
343 strncat(argv_line,"\"",sizeof(argv_line)-strlen(argv_line)-1);
346 strncat(argv_line,argv[i],sizeof(argv_line)-strlen(argv_line)-1);
349 argv_line[sizeof(argv_line)-1]='\0';
352 /* first initialization routine - nothing is known */
353 h_init(&pst, &m_msg, pgm_abbr);
355 m_msg.db.length = qtt.length = 0l;
356 m_msg.db.entries = m_msg.db.carry = qtt.entries = qtt.carry = 0;
357 m_msg.pstat_void = NULL;
358 m_msg.hist.entries = 0;
360 for (iln=0; iln<MAX_LF; iln++) m_msg.lb_mfd[iln]=NULL;
362 f_str[0] = f_str[1] = NULL;
365 /* second initialiation - get commmand line arguments */
366 initenv (argc, argv, &m_msg, &pst,&aa0[0]);
369 /* now have max_workers - allocate work_info[] */
370 if (max_workers >= MAX_WORKERS) max_workers = MAX_WORKERS;
372 (struct thr_str *)calloc(max_workers,sizeof(struct thr_str)))==NULL) {
373 fprintf(stderr, " cannot allocate work_info[%d]\n",max_workers);
381 /* label library size limits */
382 if (m_msg.n1_low > 0 && m_msg.n1_high < BIGNUM)
383 sprintf(lib_label,"library (range: %d-%d)",m_msg.n1_low,m_msg.n1_high);
384 else if (m_msg.n1_low > 0)
385 sprintf(lib_label,"library (range: >%d)",m_msg.n1_low);
386 else if (m_msg.n1_high < BIGNUM)
387 sprintf(lib_label,"library (range: <%d)",m_msg.n1_high);
389 strncpy(lib_label,"library",sizeof(lib_label));
391 sprintf(lib_label,"shuffled sequence");
393 lib_label[sizeof(lib_label)-1]='\0';
395 tstart = tscan = s_time();
396 tdstart = time(NULL);
398 /* Allocate space for the query and library sequences */
399 /* pad aa0[] with an extra 32 chars for ALTIVEC padding */
401 if ((aa0[0] = (unsigned char *)malloc((m_msg.max_tot+1+32)*sizeof(unsigned char)))
403 s_abort ("Unable to allocate query sequence", "");
407 aa0[5]=aa0[4]=aa0[3]=aa0[2]=aa0[1]=aa0[0];
409 /* make room for random sequence -
410 also used as storage for COMP_THR library overlaps
412 if ((aa1s = (unsigned char *)malloc((m_msg.max_tot+1+32)*sizeof (char))) == NULL) {
413 s_abort ("Unable to allocate shuffled library sequence", "");
420 if (m_msg.markx & MX_HTML) {
422 fprintf(stdout,"<html>\n<head>\n<title>%s Results</title>\n</head>\n<body>\n",prog_func);
424 fprintf(stdout,"<pre>\n");
428 fputs(argv_line,stdout);
432 fprintf(stdout,"%s\n",iprompt0);
433 fprintf(stdout," %s%s\n",verstr,refstr);
434 if (m_msg.markx & MX_HTML) fputs("</pre>\n",stdout);
437 if (m_msg.tname[0] == '\0') {
438 if (m_msg.quiet == 1)
439 s_abort("Query sequence undefined","");
440 l1: fputs (iprompt1, stdout);
442 if (fgets (m_msg.tname, MAX_FN, stdin) == NULL)
443 s_abort ("Unable to read query library name","");
444 m_msg.tname[MAX_FN-1]='\0';
445 if ((bp=strchr(m_msg.tname,'\n'))!=NULL) *bp='\0';
446 if (m_msg.tname[0] == '\0') goto l1;
449 /* Fetch first sequence */
453 /* Open query library */
454 if ((q_file_p= openlib(m_msg.tname, m_msg.qdnaseq,qascii,!m_msg.quiet,NULL))==NULL) {
455 s_abort(" cannot open library ",m_msg.tname);
459 QGETLIB (aa0[0], MAXTST, m_msg.qtitle, sizeof(m_msg.qtitle),
460 &qseek, &qlcont,q_file_p,&m_msg.sq0off);
461 if ((bp=strchr(m_msg.qtitle,' '))!=NULL) *bp='\0';
462 strncpy(qlabel,m_msg.qtitle,sizeof(qlabel));
463 if (bp != NULL) *bp = ' ';
464 qlabel[sizeof(qlabel)-1]='\0';
466 /* if annotations are included in sequence, remove them */
468 m_msg.n0 = ann_scan(aa0[0],m_msg.n0,&m_msg,m_msg.qdnaseq);
471 if (m_msg.term_code && !(m_msg.qdnaseq==SEQT_DNA || m_msg.qdnaseq==SEQT_RNA) &&
472 aa0[0][m_msg.n0-1]!='*') {
473 aa0[0][m_msg.n0++]='*';
477 /* check for subset */
478 if (q_file_p->opt_text[0]!='\0') {
479 if (q_file_p->opt_text[0]=='-') {
480 sstart=0; sscanf(&q_file_p->opt_text[1],"%d",&sstop);
483 sscanf(&q_file_p->opt_text[0],"%d-%d",&sstart,&sstop);
485 if (sstop <= 0 ) sstop = BIGNUM;
487 for (id=0,is=sstart; is<min(m_msg.n0,sstop); ) aa0[0][id++]=aa0[0][is++];
489 m_msg.n0 = min(m_msg.n0,sstop)-sstart;
490 if (m_msg.sq0off==1) m_msg.sq0off = sstart+1;
493 #if defined(SW_ALTIVEC) || defined(SW_SSE2)
494 /* for ALTIVEC, must pad with 15 NULL's */
495 for (id=0; id<SEQ_PAD; id++) {aa0[0][m_msg.n0+id]=0;}
499 qoffset += m_msg.n0 - m_msg.sq0off;
506 m_msg.n0 = getseq (m_msg.tname, qascii, aa0[0], m_msg.max_tot,
507 m_msg.qtitle, sizeof(m_msg.qtitle),
509 strncpy(qlabel,m_msg.tname,sizeof(qlabel));
510 qlabel[sizeof(qlabel)-1]='\0';
512 /* if annotations are included in sequence, remove them */
514 m_msg.n0 = ann_scan(aa0[0],m_msg.n0,&m_msg,m_msg.qdnaseq);
518 if (m_msg.n0 > MAXTST) {
519 fprintf(stderr," sequence truncated to %d\n %s\n",MAXTST,m_msg.sqnam);
520 fprintf(stdout," sequence truncated to %d\n %s\n",MAXTST,m_msg.sqnam);
525 if (m_msg.qdnaseq == SEQT_UNK) {
527 /* do automatic sequence recognition,but only for sequences > 20 residues */
529 (float)scanseq(aa0[0],m_msg.n0,"ACGTUNacgtun")/(float)m_msg.n0 >0.85) {
531 m_msg.qdnaseq = SEQT_DNA;
533 else { /* its protein */
535 m_msg.qdnaseq = SEQT_PROT;
537 /* modify qascii to use encoded version
538 cannot use memcpy() because it loses annotations
540 re_ascii(qascii,pascii);
541 init_ascii(pst.ext_sq_set,qascii,m_msg.qdnaseq);
542 m_msg.n0 = recode(aa0[0],m_msg.n0,qascii, pst.nsqx);
546 s_abort ("Query sequence length <= 0: ", m_msg.tname);
549 m_msg.nqsfnum = nsfnum;
550 for (i=0; i <= nsfnum & i<10; i++) m_msg.qsfnum[i] = sfnum[i];
551 m_msg.nqsfnum_n = nsfnum_n;
552 for (i=0; i <= nsfnum_n & i<10; i++) m_msg.qsfnum_n[i] = sfnum_n[i];
555 resetp (&m_msg, &pst);
558 gettitle(m_msg.tname,m_msg.qtitle,sizeof(m_msg.qtitle));
559 if (m_msg.tname[0]=='-' || m_msg.tname[0]=='@') {
560 strncmp(m_msg.tname,m_msg.qtitle,sizeof(m_msg.tname));
561 if ((bp=strchr(m_msg.tname,' '))!=NULL) *bp='\0';
565 /* get library file names */
568 if (strlen (m_msg.lname) == 0) {
569 if (m_msg.quiet == 1) s_abort("Library name undefined","");
570 libchoice(m_msg.lname,sizeof(m_msg.lname),&m_msg);
573 libselect(m_msg.lname, &m_msg);
575 if (strlen (m_msg.lname) == 0) {
576 if (m_msg.quiet == 1) s_abort("Shuffle sequence undefined","");
577 l2: fputs(iprompt2,stdout);
579 if (fgets (m_msg.lname, MAX_FN, stdin) == NULL)
580 s_abort ("Unable to read shuffle file name","");
581 m_msg.lname[MAX_FN-1]='\0';
582 if ((bp=strchr(m_msg.lname,'\n'))!=NULL) *bp='\0';
583 if (m_msg.lname[0] == '\0') goto l2;
585 m_msg.lbnames[0]= m_msg.lname;
590 /* Get additional parameters here */
591 if (!m_msg.quiet) query_parm (&m_msg, &pst);
593 last_init(&m_msg, &pst);
595 /* Allocate space for saved scores */
597 (struct beststr *)calloc((MAXBEST+1),sizeof(struct beststr)))==NULL)
598 s_abort ("Cannot allocate best struct","");
600 (struct beststr **)malloc((MAXBEST+1)*sizeof(struct beststr *)))==NULL)
601 s_abort ("Cannot allocate bestp_arr","");
603 /* Initialize bestp_arr */
604 for (nbest = 0; nbest < MAXBEST+1; nbest++)
605 bestp_arr[nbest] = &best[nbest];
607 best[-1].score[0]=best[-1].score[1]=best[-1].score[2]= INT_MAX;
608 best[-1].zscore=FLT_MAX; /* for Z-scores, bigger is best */
609 best[-1].escore=FLT_MIN; /* for E()-values, lower is best */
612 (struct stat_str *)calloc(MAXSTATS,sizeof(struct stat_str)))==NULL)
613 s_abort ("Cannot allocate stats struct","");
616 /* set up signals now that input is done */
617 signal(SIGHUP,SIG_IGN);
621 /* Set up buffers for reading the library:
623 We will start by using a 2 Mbyte buffer for each worker. For
624 proteins, that means 5,000 sequences of length 400 (average).
625 For DNA, that means 2,000 sequences of length 1000. At the
626 moment, those are good averages.
629 if (m_msg.ldnaseq== SEQT_DNA) {
630 max_buf_cnt = MAX_NT_BUF;
631 ave_seq_len = AVE_NT_LEN;
634 max_buf_cnt = MAX_AA_BUF;
635 ave_seq_len = AVE_AA_LEN;
638 /* however - buffer sizes should be a function of the number of
639 workers so that all the workers are kept busy. Assuming a 10,000
640 entry library is the smallest we want to schedule, then
643 if (max_buf_cnt > 10000/max_workers)
644 max_buf_cnt = 10000/(2*max_workers);
646 max_buf_cnt /= m_msg.thr_fact;
648 /* finally, max_work_buf should be mod 6 for tfasta */
649 max_buf_cnt -= (max_buf_cnt % 6);
651 max_work_buf = 2*max_workers;
653 /* allocate space for library buffers and results */
655 buf_siz=max_buf_cnt*ave_seq_len;
656 if (buf_siz < m_msg.max_tot) buf_siz = m_msg.max_tot;
657 for (i=0; i<max_work_buf; i++) {
658 if ((buf_list[i].buf =(struct buf_str *)calloc((size_t)(max_buf_cnt+1),
659 sizeof(struct buf_str)))
661 fprintf(stderr," cannot allocate buffer struct %d %d\n",i,max_buf_cnt+1);
664 buf_list[i].buf_cnt=0;
665 buf_list[i].have_results=0;
666 if ((buf_list[i].start =
667 (unsigned char *)calloc((size_t)(buf_siz),sizeof(unsigned char)))
669 fprintf(stderr," cannot allocate buffer %d\n",i);
673 /* make certain there is a '\0' at the beginning */
676 reader_buf[i] = &buf_list[i];
679 /* initialization of global variables for threads/buffers */
682 num_reader_bufs = max_work_buf;
684 worker_buf_workp = 0;
685 worker_buf_readp = 0;
686 reader_buf_workp = 0;
687 reader_buf_readp = 0;
689 start_thread = 1; /* keeps threads from starting */
692 /* Label the output */
693 if ((bp = (char *) strchr (m_msg.lname, ' ')) != NULL) *bp = '\0';
694 if (m_msg.ltitle[0] == '\0') {
695 strncpy(m_msg.ltitle,m_msg.lname,sizeof(m_msg.ltitle));
696 m_msg.ltitle[sizeof(m_msg.ltitle)-1]='\0';
700 printf("Query library %s vs %s library\n", m_msg.tname,m_msg.lname);
701 if (m_msg.nln > 0) printf("searching %s library\n\n",m_msg.lbnames[0]);
706 m_msg.db.length = 0l;
707 m_msg.db.entries = m_msg.db.carry = 0;
712 maxl = m_msg.max_tot - m_msg.n0 -2; /* maxn = max library sequence space */
714 maxn = reset_maxn(&m_msg,maxl);
722 /* get the last parameters */
723 last_params(aa0[0],m_msg.n0, &m_msg, &pst);
726 if our function returns approximate E()-scores, we do not need to
727 work with raw scores and later calculate z-scores. When
728 approx. E()-scores are calculated, we still need various
729 statistics structures, but we can get them immediately. In this
730 case, find_zp() must produce a z_score (large positive is good)
734 if (m_msg.escore_flg) {
735 pst.zsflag_f = process_hist(stats,nstats,m_msg,pst,
736 &m_msg.hist,&m_msg.pstat_void,0);
741 if (m_msg.qshuffle) {
742 if ((aa0s=(unsigned char *)calloc(m_msg.n0+2,sizeof(char)))==NULL) {
743 fprintf(stderr,"cannot allocate aa0s[%d]\n",m_msg.n0+2);
748 memcpy(aa0s,aa0[0],m_msg.n0);
749 qshuffle(aa0s,m_msg.n0,m_msg.nm0);
752 /* previous versions of FASTA have stored the reverse complement in
753 the same array as the forward query sequence. This version
754 changes that, by allocating separate space for the reverse complement,
755 and thus reducing the demand for a large MAXLIB/MAXTRN for long queries
757 if (m_msg.qframe == 2) {
758 if ((aa0[1]=(unsigned char *)calloc(m_msg.n0+2,sizeof(char)))==NULL) {
759 fprintf(stderr,"cannot allocate aa0[1][%d]\n",m_msg.n0+2);
764 memcpy(aa0[1],aa0[0],m_msg.n0+1);
765 revcomp(aa0[1],m_msg.n0,&pst.c_nt[0]);
767 /* set aa1 for serial - threaded points aa1 to buffer */
769 aa1 = aa0[0] + m_msg.n0+1; /* modified now that aa0[1] is done separately */
772 init_thr(max_workers, work_info, m_msg, &pst, aa0[0], max_work_buf);
775 if (m_msg.qshuffle && qstats==NULL) {
777 (struct stat_str *)calloc(m_msg.shuff_max+1,sizeof(struct stat_str)))==NULL)
778 s_abort ("Cannot allocate qstats struct","");
782 if (m_msg.markx & MX_HTML) fputs("<pre>\n",stdout);
784 /* rline[] is a tmp string */
785 if (m_msg.qdnaseq == SEQT_DNA || m_msg.qdnaseq == SEQT_RNA) {
786 strncpy(rline,(m_msg.qframe==1)? " (forward-only)" : "\0",sizeof(rline));
787 rline[sizeof(rline)-1]='\0';
791 leng = (int)strlen(m_msg.qtitle);
792 if (leng > 50) leng -= 10;
794 sprintf (&m_msg.qtitle[leng], " %d %s", m_msg.n0, m_msg.sqnam);
799 printf("%3d>>>%s - %d %s%s\n vs %.60s %s\n", qlib,
800 m_msg.qtitle, m_msg.n0, m_msg.sqnam,
801 (m_msg.revcomp ? " (reverse complement)" : rline),
802 m_msg.ltitle,lib_label);
804 printf("%.50s: %d %s%s\n %s\n vs %.60s %s\n",
805 qlabel, m_msg.n0, m_msg.sqnam,
806 (m_msg.revcomp ? " (reverse complement)" : rline),
807 m_msg.qtitle,m_msg.ltitle,lib_label);
809 libstr_p = &libstr[0];
810 n_libstr=sizeof(libstr);
812 libstr_p = &m_msg.ltitle[0];
813 n_libstr= sizeof(m_msg.ltitle);
814 set_shuffle(m_msg); /* set count/width parameters in llgetaa.c */
821 if (m_msg.dfile[0] && (fdata=fopen(m_msg.dfile,"w"))!=NULL)
822 fprintf(fdata,"%3d\t%-50s\n",m_msg.n0,m_msg.qtitle);
824 qtt.length += m_msg.n0;
830 /* now open the library and start reading */
831 /* get a buffer and fill it up */
832 get_rbuf(&cur_buf,max_work_buf);
834 cur_buf->buf_cnt = 0;
835 cur_buf->have_results = 0;
836 cur_buf->buf[0].aa1b = cur_buf->start;
839 #else /* ! COMP_THR */
840 /* initialize the comparison function, returning f_str */
841 init_work (aa0[0], m_msg.n0, &pst, &f_str[0]);
844 f_str[5] = f_str[4] = f_str[3] = f_str[2] = f_str[1] = f_str[0];
845 if (m_msg.qframe == 2) {
846 init_work ( aa0[1], m_msg.n0, &pst, &f_str[1]);
848 if (m_msg.qshuffle) {
849 init_work ( aa0s, m_msg.n0, &pst, &qf_str);
851 #endif /* COMP_THR */
853 /* open the library - start the search */
855 for (iln = 0; iln < m_msg.nln; iln++) {
856 if ((m_msg.lb_mfd[iln] = m_file_p=
857 openlib(m_msg.lbnames[iln], m_msg.ldnaseq, lascii, !m_msg.quiet, m_msg.lb_mfd[iln]))
859 fprintf(stderr," cannot open library %s\n",m_msg.lbnames[iln]);
862 #if !defined(PRSS) && !defined(COMP_MLIB)
864 printf ("searching %s %s\n",m_msg.lbnames[iln],lib_label);
870 n1tot_v = n1tot_cnt = 0;
871 n1tot_cur = n1tot_ptr = NULL;
873 /* get next buffer to read into */
879 /* read sequence directly into buffer */
880 aa1ptr = aa1 = cur_buf->buf[nseq].aa1b;
883 while ((n1=GETLIB(aa1ptr,maxt,libstr_p,n_libstr,&lmark,&lcont,m_file_p,&l_off))>=0) {
885 if (n_libstr <= MAX_UID) {
886 if ((bp=strchr(libstr_p,' '))!=NULL) *bp='\0';
889 if (m_msg.term_code && !lcont &&
890 m_msg.ldnaseq==SEQT_PROT && aa1ptr[n1-1]!=m_msg.term_code) {
891 aa1ptr[n1++]=m_msg.term_code;
895 #if defined(SW_ALTIVEC) || defined(SW_SSE2)
896 /* for ALTIVEC, must pad with 15 NULL's */
897 for (id=0; id<SEQ_PAD; id++) {aa1ptr[n1+id]=0;}
901 if (aa1[-1]!='\0' || aa1ptr[n1]!='\0') {
902 fprintf(stderr,"%s: aa1[%d] missing NULL boundaries: %d %d\n",libstr_p,n1,aa1[-1],aa1ptr[n1]);
906 /* check for a continued sequence and provide a pointer to
907 the n1_tot array if lcont || ocont */
909 if (lcont && !ocont) { /* get a new pointer */
910 if (n1tot_cnt <= 0) {
911 if ((n1tot_ptr=calloc(1000,sizeof(int)))==NULL) {
912 fprintf(stderr," cannot allocate n1tot_ptr\n");
915 else {n1tot_cnt=1000;}
918 n1tot_cur = n1tot_ptr++;
921 if (n1tot_v < m_msg.n1_low || n1tot_v > m_msg.n1_high) {
926 m_msg.db.length += n1;
927 if (m_msg.db.length > LONG_MAX) {
928 m_msg.db.length -= LONG_MAX; m_msg.db.carry++;
932 /* This finds most reasons for core dumps */
935 if (aa1[i]>=pst.nsqx)
937 "%s residue[%d/%d] %d range (%d) lcont/ocont: %d/%d\n%s\n",
938 libstr,i,n1,aa1[i],pst.nsqx,lcont,ocont,aa1ptr+i);
945 /* don't count long sequences more than once */
946 if (aa1!=aa1ptr) {n1 += m_msg.loff; m_msg.db.entries--;}
950 if (m_msg.db.entries % 200 == 199) {
952 if (m_msg.db.entries % 10000 == 9999) fputc('\n',stderr);
953 else if (m_msg.db.entries % 1000 == 999) fputc(' ',stderr);
960 fprintf(stderr,"Ignoring: %s\n",libstr);
968 memcpy(aa1s,aa1,n1s);
974 /* if COMP_THR - fill and empty buffers */
978 for (itt=m_msg.revcomp; itt<=m_msg.nitt1; itt++) {
981 cur_buf_p = &(cur_buf->buf[nseq++]);
983 cur_buf_p->n1tot_p = n1tot_cur;
984 cur_buf_p->lseek = lmark;
985 cur_buf_p->cont = ocont+1;
986 cur_buf_p->m_file_p = (void *)m_file_p;
987 cur_buf_p->frame = itt;
988 memcpy(cur_buf_p->libstr,libstr,MAX_UID);
990 cur_buf_p->nsfnum = nsfnum;
991 if ((cur_buf_p->sfnum[0]=sfnum[0])>0 &&
992 (cur_buf_p->sfnum[1]=sfnum[1])>0 &&
993 (cur_buf_p->sfnum[2]=sfnum[2])>0 &&
994 (cur_buf_p->sfnum[3]=sfnum[3])>0 &&
995 (cur_buf_p->sfnum[4]=sfnum[4])>0 &&
996 (cur_buf_p->sfnum[5]=sfnum[5])>0 &&
997 (cur_buf_p->sfnum[6]=sfnum[6])>0 &&
998 (cur_buf_p->sfnum[7]=sfnum[7])>0 &&
999 (cur_buf_p->sfnum[8]=sfnum[8])>0 &&
1000 (cur_buf_p->sfnum[9]=sfnum[9])>0) ;
1003 /* this assumes that max_buf_cnt is guaranteed %6=0 so that
1004 additional pointers to the same buffer can be used
1005 nseq now points to next buffer
1008 cur_buf->buf[nseq].aa1b = cur_buf->buf[nseq-1].aa1b;
1011 /* make a copy of the overlap (threaded only) */
1013 memcpy(aa1s,&aa1[n1-m_msg.loff],m_msg.loff);
1016 /* if the buffer is filled */
1017 if (nseq >= max_buf_cnt || ntbuff >= buf_siz - maxn) {
1019 /* provide filled buffer to workers */
1020 put_rbuf(cur_buf,max_work_buf);
1022 /* get an empty buffer to fill */
1023 get_rbuf(&cur_buf,max_work_buf);
1025 /* "empty" buffers have results that must be processed */
1026 if (cur_buf->buf_cnt && cur_buf->have_results) {
1027 save_best(cur_buf,m_msg,pst,fdata,m_msg.qsfnum,&m_msg.hist,
1036 /* now the buffer is truly empty, fill it up */
1037 cur_buf->buf_cnt = 0;
1038 cur_buf->have_results = 0;
1039 /* point the first aa1 ptr to the buffer start */
1040 aa1=cur_buf->buf[0].aa1b = cur_buf->start;
1044 else { /* room left in current buffer, increment ptrs */
1045 aa1=cur_buf->buf[nseq].aa1b = cur_buf->buf[nseq-1].aa1b+n1+1;
1047 #else /* if !COMP_THR - do a bunch of searches */
1049 /* t_best and t_rbest are used to save the best score or shuffled
1050 score from all the frames */
1052 t_best = t_rbest = t_qrbest = -1;
1053 t_escore = t_rescore = t_qrescore = FLT_MAX;
1054 for (itt=m_msg.revcomp; itt<=m_msg.nitt1; itt++) {
1056 rst.score[0] = rst.score[1] = rst.score[2] = 0;
1057 do_work (aa0[itt], m_msg.n0,aa1,n1,itt,&pst,f_str[itt],0,&rst);
1059 if (rst.score[pst.score_ix] > t_best) {
1060 t_best = rst.score[pst.score_ix];
1065 "%-12s %5d %6d %d %.5f %.5f %4d %4d %4d %g %d %d %8lld\n",
1068 sfn_cmp(m_msg.qsfnum,sfnum),
1074 rst.score[0],rst.score[1],rst.score[2],
1075 rst.escore, rst.segnum, rst.seglen, lmark);
1081 s_score[0] = rst.score[0];
1082 s_score[1] = rst.score[1];
1083 s_score[2] = rst.score[2];
1088 t_best = t_rbest = rst.score[pst.score_ix];
1089 t_escore = t_rescore = rst.escore;
1091 if (m_msg.qshuffle) {
1092 do_work (aa0s, m_msg.n0,aa1,n1,itt,&pst,qf_str,1,&rrst);
1094 if (rrst.score[pst.score_ix] > t_qrbest)
1095 t_qrbest = rrst.score[pst.score_ix];
1096 if (rrst.escore < t_qrescore)
1097 t_qrescore = rrst.escore;
1099 if (itt==m_msg.nitt1 && nqstats < m_msg.shuff_max) {
1100 qstats[nqstats].n1 = n1; /* save the best score */
1101 qstats[nqstats].comp = rst.comp;
1102 qstats[nqstats].H = rst.H;
1103 qstats[nqstats].escore = t_qrescore;
1104 qstats[nqstats++].score = t_qrbest;
1105 t_qrbest = -1; /* reset t_qrbest, t_qrescore */
1106 t_qrescore = FLT_MAX;
1110 if (pst.zsflag >= 10) {
1111 if (pst.zs_win > 0) wshuffle(aa1,aa1s,n1,pst.zs_win,&ieven);
1112 else shuffle(aa1,aa1s,n1);
1113 do_work (aa0[itt], m_msg.n0, aa1s, n1,itt,&pst,f_str[itt],0,&rrst);
1114 if (rrst.score[pst.score_ix] > t_rbest) {
1115 t_rbest = rrst.score[pst.score_ix];
1116 t_rescore = rrst.escore;
1120 i_score = rst.score[pst.score_ix];
1122 /* this section saves scores for statistics calculations. For
1123 comparisons that can be from one of 2 or 6 frames, it should only
1124 be run once, for the best of the 2 or 6 scores. t_rbest,t_rescore
1125 have the best of the 2 or 6 scores from the frames. For proteins,
1126 this is run for every score.
1129 #ifdef PRSS /* don't save the first score (unshuffled) with PRSS */
1133 if (itt == m_msg.nitt1) {
1134 if (nstats < MAXSTATS) {
1135 stats[nstats].n1 = n1; /* save the best score */
1136 stats[nstats].comp = rst.comp;
1137 stats[nstats].H = rst.H;
1138 if (pst.zsflag >=10) {
1140 t_escore = t_rescore;
1142 stats[nstats].escore = t_escore;
1143 stats[nstats++].score = t_best;
1144 t_best = t_rbest = -1; /* reset t_rbest, t_best */
1145 t_escore = t_rescore = FLT_MAX;
1147 else if (pst.zsflag >= 0) {
1149 pst.zsflag_f = process_hist(stats,nstats,m_msg,pst,
1150 &m_msg.hist,&m_msg.pstat_void,0);
1153 for (i=0; i<MAXBEST; i++) {
1154 bestp_arr[i]->zscore =
1155 (*find_zp)(bestp_arr[i]->score[pst.score_ix],
1156 bestp_arr[i]->escore, bestp_arr[i]->n1,
1157 bestp_arr[i]->comp, m_msg.pstat_void);
1159 zbestcut = bestp_arr[nbest-1]->zscore;
1163 /* older versions saved the first MAXSTATS scores, and ignored the
1164 rest in the statistics. With SAMP_STATS, scores after MAX_STATS
1165 are sampled at random, and included in the sample set and the
1166 statistics parameters are re-derived at the end of the run using
1169 It would be faster not to do the nrand(); if(jstats < MAXSTATS)
1172 if (!m_msg.escore_flg) { /* only for zscores */
1173 jstats = nrand(++kstats); /* no mod % 0 */
1174 if (jstats < MAXSTATS) {
1175 stats[jstats].n1 = n1; /* save the best score */
1176 stats[jstats].comp = rst.comp;
1177 stats[jstats].H = rst.H;
1178 if (pst.zsflag >=10) t_best = t_rbest;
1179 stats[jstats].score = t_best;
1183 } /* ( nstats >= MAXSTATS) && zsflag >= 0 */
1184 } /* itt1 == nitt1 */
1189 /* this section completes work on the current score */
1190 if (stats_done) { /* stats_done > 0 => nstats >= MAXSTATS */
1191 zscore=(*find_zp)(i_score, rst.escore, n1, rst.comp,
1194 if (itt == m_msg.nitt1) {
1195 if (pst.zsflag >= 10) t_best = t_rbest;
1197 addhistz((*find_zp)(t_best, t_escore, n1, rst.comp,
1200 t_best = t_rbest = -1;
1203 else zscore = (double) i_score;
1206 if (zscore > zbestcut ) {
1207 if (nbest >= MAXBEST) {
1208 bestfull = nbest-MAXBEST/4;
1209 selectbestz(bestp_arr,bestfull-1,nbest);
1210 zbestcut = bestp_arr[bestfull-1]->zscore;
1214 bestp = bestp_arr[nbest++];
1215 bestp->score[0] = rst.score[0];
1216 bestp->score[1] = rst.score[1];
1217 bestp->score[2] = rst.score[2];
1218 bestp->comp = rst.comp;
1220 bestp->zscore = zscore;
1221 bestp->escore = rst.escore;
1222 bestp->segnum = rst.segnum;
1223 bestp->seglen = rst.seglen;
1224 bestp->lseek = lmark;
1225 bestp->cont = ocont+1;
1226 bestp->m_file_p = m_file_p;
1228 bestp->n1tot_p=n1tot_cur;
1230 memcpy(bestp->libstr,libstr,MAX_UID);
1232 bestp->nsfnum = nsfnum;
1233 if ((bestp->sfnum[0]=sfnum[0])>0 &&
1234 (bestp->sfnum[1]=sfnum[1])>0 &&
1235 (bestp->sfnum[2]=sfnum[2])>0 &&
1236 (bestp->sfnum[3]=sfnum[3])>0 &&
1237 (bestp->sfnum[4]=sfnum[4])>0 &&
1238 (bestp->sfnum[5]=sfnum[5])>0 &&
1239 (bestp->sfnum[6]=sfnum[6])>0 &&
1240 (bestp->sfnum[7]=sfnum[7])>0 &&
1241 (bestp->sfnum[8]=sfnum[8])>0 &&
1242 (bestp->sfnum[9]=sfnum[9])>0) ;
1247 bestp = bestp_arr[nbest++];
1248 bestp->score[0] = rst.score[0];
1249 bestp->score[1] = rst.score[1];
1250 bestp->score[2] = rst.score[2];
1251 bestp->comp = rst.comp;
1253 bestp->zscore = zscore;
1254 bestp->escore = rst.escore;
1255 bestp->segnum = rst.segnum;
1256 bestp->seglen = rst.seglen;
1257 bestp->lseek = lmark;
1259 bestp->m_file_p = m_file_p;
1261 bestp->n1tot_p=n1tot_cur;
1263 memcpy(bestp->libstr,libstr,MAX_UID);
1274 memcpy(aa1,&aa1[n1-m_msg.loff],m_msg.loff);
1276 memcpy(aa1,aa1s,m_msg.loff);
1278 aa1ptr= &aa1[m_msg.loff];
1279 loffset += n1 - m_msg.loff;
1285 if (ocont) *n1tot_cur = n1tot_v;
1291 } /* end while((n1=getlib())) */
1292 } /* end iln=1..nln */
1297 /* check last buffers for any results */
1298 put_rbuf_done(max_workers,cur_buf,max_work_buf);
1300 for (i=0; i < num_reader_bufs; i++) {
1301 reader_buf_readp = (reader_buf_readp+1)%(max_work_buf);
1302 if (reader_buf[reader_buf_readp]->buf_cnt > 0 &&
1303 reader_buf[reader_buf_readp]->have_results) {
1304 save_best(reader_buf[reader_buf_readp],m_msg,pst,fdata,m_msg.qsfnum,
1305 &m_msg.hist, &m_msg.pstat_void
1316 if (m_msg.db.entries >= 200) {fprintf(stderr," Done!\n");}
1319 m_msg.nbr_seq = m_msg.db.entries;
1320 get_param(&pst, gstring2,gstring3);
1322 /* *************************** */
1323 /* analyze the last results */
1324 /* *************************** */
1328 if (!stats_done && nstats > 0) {
1330 pst.zsflag_f = process_hist(stats,nstats,m_msg,pst,&m_msg.hist,
1331 &m_msg.pstat_void,stats_done);
1332 if (m_msg.pstat_void != NULL) {
1334 for (i = 0; i < nbest; i++) {
1335 bestp_arr[i]->zscore =
1336 (*find_zp)(bestp_arr[i]->score[pst.score_ix],
1337 bestp_arr[i]->escore, bestp_arr[i]->n1,
1338 bestp_arr[i]->comp, m_msg.pstat_void);
1342 else pst.zsflag = -1;
1346 if (pst.zsflag < 10) pst.zsflag += 10;
1347 pst.zsflag_f = process_hist(stats,nstats,m_msg,pst,
1348 &m_msg.hist, &m_msg.pstat_void,0);
1350 for (i = 0; i < nbest; i++) {
1351 bestp_arr[i]->zscore = (*find_zp)(bestp_arr[i]->score[pst.score_ix],
1352 bestp_arr[i]->escore, bestp_arr[i]->n1,
1353 bestp_arr[i]->comp, m_msg.pstat_void);
1357 if (pst.zdb_size <= 1) pst.zdb_size = m_msg.db.entries;
1360 /* before I call last_calc/showbest/showalign, I need init_work() to
1361 get an f_str. This duplicates some code above, which is used in
1362 the non-threaded version
1366 init_work(aa0[0],m_msg.n0,&pst,&f_str[0]);
1368 f_str[5] = f_str[4] = f_str[3] = f_str[2] = f_str[1] = f_str[0];
1370 if (m_msg.qframe == 2) {
1371 if ((aa0[1]=(unsigned char *)calloc((size_t)m_msg.n0+2,
1372 sizeof(unsigned char)))==NULL) {
1373 fprintf(stderr," cannot allocate aa0[1][%d] for alignments\n",
1378 memcpy(aa0[1],aa0[0],m_msg.n0+1);
1379 revcomp(aa0[1],m_msg.n0,&pst.c_nt[0]);
1380 init_work(aa0[1],m_msg.n0,&pst,&f_str[1]);
1383 /* I also need a "real" aa1 */
1384 aa1 = buf_list[0].start;
1386 /* for PRSS - I need the original second (non-shuffled) sequence */
1387 memcpy(aa1,aa1s,n1s+1);
1392 /* now we have one set of scaled scores for in bestp_arr -
1393 for FASTS/F, we need to do some additional processing */
1395 if (!m_msg.qshuffle) {
1396 last_stats(aa0[0], m_msg.n0, stats,nstats, bestp_arr,nbest,
1397 m_msg, pst, &m_msg.hist, &m_msg.pstat_void);
1400 last_stats(aa0[0], m_msg.n0,
1401 qstats,nqstats, bestp_arr,nbest, m_msg, pst,
1402 &m_msg.hist, &m_msg.pstat_void);
1405 /* here is a contradiction: if pst.zsflag < 0, then m_msg.pstat_void
1406 should be NULL; if it is not, then process_hist() has been called */
1407 if (pst.zsflag < 0 && m_msg.pstat_void != NULL) pst.zsflag = 1;
1409 if (m_msg.last_calc_flg) {
1410 /* last_calc may need coefficients from last_stats() */
1411 nbest = last_calc(aa0, aa1, maxn, bestp_arr, nbest, m_msg, &pst,
1412 f_str, m_msg.pstat_void);
1415 scale_scores(bestp_arr,nbest,m_msg.db,pst,m_msg.pstat_void);
1417 get_param(&pst, gstring2,gstring3);
1420 /* gettitle(m_msg.lname,m_msg.ltitle,sizeof(m_msg.ltitle)); */
1421 printf("%.50s - %s %d %s%s\n vs %.60s - %s shuffled sequence\n",
1422 m_msg.tname, m_msg.qtitle,m_msg.n0, m_msg.sqnam,
1423 (m_msg.revcomp ? " (reverse complement)" : "\0"),
1424 m_msg.lname,m_msg.ltitle);
1427 prhist (stdout, m_msg, pst, m_msg.hist, nstats, m_msg.db, gstring2);
1430 printf (" Scan time: ");
1431 ptime(stdout,tscan-tprev);
1434 ttscan += tscan-tprev;
1439 printf("Enter filename for results [%s]: ", m_msg.outfile);
1444 if (!m_msg.quiet && fgets(rline,sizeof(rline),stdin)==NULL) goto end_l;
1445 if ((bp=strchr(rline,'\n'))!=NULL) *bp = '\0';
1446 if (rline[0]!='\0') strncpy(m_msg.outfile,rline,sizeof(m_msg.outfile));
1447 if (m_msg.outfile[0]!='\0') {
1448 if ((outfd=fopen(m_msg.outfile,"w"))==NULL) {
1449 fprintf(stderr," could not open %s\n",m_msg.outfile);
1450 if (!m_msg.quiet) goto l3;
1455 fputs(argv_line,outfd);
1458 fputs(iprompt0,outfd);
1459 fprintf(outfd," %s%s\n",verstr,refstr);
1461 fprintf(outfd," %s%s, %d %s\n vs %s %s\n",
1462 qlabel, (m_msg.revcomp ? "-" : "\0"), m_msg.n0,
1463 m_msg.sqnam, m_msg.ltitle, lib_label);
1465 prhist(outfd,m_msg,pst,m_msg.hist, nstats, m_msg.db, gstring2);
1469 if (m_msg.markx & MX_HTML) {
1470 fputs("</pre>\n<p>\n<hr>\n<p>\n",outfd);
1473 /* code from p2_complib.c to pre-calculate -m 9 alignment info -
1474 requires -q with -m 9 */
1476 if (m_msg.quiet || m_msg.markx & MX_M9SUMM) {
1478 /* to determine how many sequences to re-align (either for
1479 do_opt() or calc_id() we need to modify m_msg.mshow to get
1480 the correct number of alignments */
1482 if (m_msg.mshow_flg != 1 && pst.zsflag >= 0) {
1483 for (i=0; i<nbest && bestp_arr[i]->escore< m_msg.e_cut; i++) {}
1488 if (m_msg.mshow <= 0) { /* no results to display */
1489 fprintf(outfd,"!! No sequences with E() < %f\n",m_msg.e_cut);
1497 memcpy(aa1,aa1s,n1s);
1502 showbest (stdout, aa0, aa1, maxn, bestp_arr, nbest, qtt.entries, &m_msg, pst,
1503 m_msg.db, gstring2, f_str);
1505 if (outfd != stdout) {
1506 t_quiet = m_msg.quiet;
1507 m_msg.quiet = -1; /* should guarantee 1..nbest shown */
1508 showbest (outfd, aa0, aa1, maxn, bestp_arr, nbest, qtt.entries, &m_msg, pst,
1509 m_msg.db, gstring2, f_str);
1510 m_msg.quiet = t_quiet;
1513 if (m_msg.nshow > 0) {
1516 printf(" Display alignments also? (y/n) [n] "); fflush(stdout);
1517 if (fgets(rline,sizeof(rline),stdin)==NULL) goto end_l;
1521 if (toupper((int)rline[0])=='Y') {
1523 printf(" number of alignments [%d]? ",m_msg.nshow);
1525 if (fgets(rline,sizeof(rline),stdin)==NULL) goto end_l;
1526 if (rline[0]!=0) sscanf(rline,"%d",&m_msg.nshow);
1527 m_msg.ashow=m_msg.nshow;
1530 if (m_msg.markx & (MX_AMAP+ MX_HTML + MX_M9SUMM)) {
1531 fprintf(outfd,"\n>>>%s%s, %d %s vs %s library\n",
1532 qlabel,(m_msg.revcomp ? "_rev":"\0"), m_msg.n0,
1533 m_msg.sqnam,m_msg.lname);
1536 if (m_msg.markx & MX_M10FORM) {
1537 fprintf(outfd,"\n>>>%s%s, %d %s vs %s library\n",
1538 qlabel,(m_msg.revcomp ? "-":"\0"), m_msg.n0, m_msg.sqnam,
1540 fprintf(outfd,"; pg_name: %s\n",argv[0]);
1541 fprintf(outfd,"; pg_ver: %s\n",mp_verstr);
1542 fprintf(outfd,"; pg_argv:");
1543 for (i=0; i<argc; i++)
1544 fprintf(outfd," %s",argv[i]);
1546 fputs(gstring3,outfd);
1547 fputs(hstring1,outfd);
1551 showalign (outfd, aa0, aa1, maxn, bestp_arr, nbest, qtt.entries,
1552 m_msg, pst, gstring2, f_str);
1554 if (pst.sw_flag > 0 || (!m_msg.quiet && m_msg.nshow>0)) {
1555 showalign (outfd, aa0, aa1, maxn, bestp_arr, nbest, qtt.entries,
1556 m_msg, pst, gstring2, f_str);
1565 #if defined(COMP_THR) && defined(COMP_MLIB)
1566 for (i=0; i<max_work_buf; i++) {
1567 buf_list[i].buf_cnt=0;
1568 buf_list[i].have_results=0;
1571 num_worker_bufs = 0;
1572 num_reader_bufs = max_work_buf;
1574 worker_buf_workp = 0;
1575 worker_buf_readp = 0;
1576 reader_buf_workp = 0;
1577 reader_buf_readp = 0;
1579 start_thread = 1; /* stop thread from starting again */
1582 /* clean up alignment encodings */
1583 for (i=0; i < m_msg.nshow; i++) {
1584 if (bestp_arr[i]->have_ares) {
1585 free(bestp_arr[i]->a_res.res);
1586 bestp_arr[i]->a_res.res = NULL;
1587 bestp_arr[i]->have_ares = 0;
1591 if (m_msg.qframe == 2) free(aa0[1]-1);
1594 if (f_str[1]!=f_str[0]) {
1595 close_work (aa0[1], m_msg.n0, &pst, &f_str[1]);
1597 close_work (aa0[0], m_msg.n0, &pst, &f_str[0]);
1600 if (m_msg.qshuffle) close_work (aa0s, m_msg.n0, &pst, &qf_str);
1603 free_pam2p(pst.pam2p[0]);
1604 free_pam2p(pst.pam2p[1]);
1608 for (iln=0; iln < m_msg.nln; iln++) {
1609 if (m_msg.lb_mfd[iln]!=NULL) closelib(m_msg.lb_mfd[iln]);
1612 tddone = time(NULL);
1617 fprintf(fdata,"/** %s **/\n",gstring2);
1618 fprintf(fdata,"%3ld%-50s\n",qtt.entries-1,m_msg.qtitle);
1623 ttdisp += tdone-tscan;
1625 maxn = m_msg.max_tot;
1627 QGETLIB (aa0[0], MAXTST, m_msg.qtitle, sizeof(m_msg.qtitle),
1628 &qseek, &qlcont,q_file_p,&m_msg.sq0off);
1629 if (m_msg.n0 <= 0) break;
1630 if ((bp=strchr(m_msg.qtitle,' '))!=NULL) *bp='\0';
1631 strncpy(qlabel, m_msg.qtitle,sizeof(qlabel));
1632 if (bp != NULL) *bp=' ';
1633 qlabel[sizeof(qlabel)-1]='\0';
1635 if (m_msg.ann_flg) {
1636 m_msg.n0 = ann_scan(aa0[0],m_msg.n0,&m_msg,m_msg.qdnaseq);
1639 if (m_msg.term_code && m_msg.qdnaseq==SEQT_PROT &&
1640 aa0[0][m_msg.n0-1]!=m_msg.term_code) {
1641 aa0[0][m_msg.n0++]=m_msg.term_code;
1645 #if defined(SW_ALTIVEC) || defined(SW_SSE2)
1646 /* for ALTIVEC, must pad with 15 NULL's */
1647 for (id=0; id<SEQ_PAD; id++) {aa0[0][m_msg.n0+id]=0;}
1651 m_msg.nqsfnum = nsfnum;
1652 for (i=0; i <= nsfnum & i<10; i++) m_msg.qsfnum[i] = sfnum[i];
1653 m_msg.nqsfnum_n = nsfnum_n;
1654 for (i=0; i <= nsfnum_n & i<10; i++) m_msg.qsfnum_n[i] = sfnum_n[i];
1658 if (m_msg.markx & MX_M10FORM)
1659 fprintf(outfd,">>><<<\n");
1662 if ( m_msg.markx & MX_HTML) fputs("<p><pre>\n",outfd);
1663 printsum(outfd, m_msg.db);
1664 if ( m_msg.markx & MX_HTML) fputs("</pre>\n",outfd);
1666 if (m_msg.markx & MX_HTML) fprintf(outfd,"</body>\n</html>\n");
1668 if (outfd!=stdout) printsum(stdout,m_msg.db);
1671 } /* End of main program */
1674 printsum(FILE *fd, struct db_str ntt)
1677 char tstr1[26], tstr2[26];
1679 strncpy(tstr1,ctime(&tdstart),sizeof(tstr1));
1680 strncpy(tstr2,ctime(&tddone),sizeof(tstr1));
1681 tstr1[24]=tstr2[24]='\0';
1683 /* Print timing to output file as well */
1684 fprintf(fd, "\n\n%ld residues in %ld query sequences\n", qtt.length, qtt.entries);
1686 fprintf(fd, "%ld residues in %ld library sequences\n", ntt.length, ntt.entries);
1688 db_tt = (double)ntt.carry*(double)LONG_MAX + (double)ntt.length;
1689 fprintf(fd, "%.0f residues in %ld library sequences\n", db_tt, ntt.entries);
1693 fprintf(fd," Scomplib [%s]\n start: %s done: %s\n",mp_verstr,tstr1,tstr2);
1695 fprintf(fd," Tcomplib [%s] (%d proc)\n start: %s done: %s\n", mp_verstr,
1696 max_workers,tstr1,tstr2);
1699 fprintf(fd," Scan time: ");
1700 ptime(fd, tscan - tprev);
1701 fprintf (fd," Display time: ");
1702 ptime (fd, tdone - tscan);
1704 fprintf(fd," Total Scan time: ");
1706 fprintf (fd," Total Display time: ");
1710 fprintf (fd, "\nFunction used was %s [%s]\n", prog_func,verstr);
1717 db.entries = db.length = db.carry = 0;
1719 tddone = time(NULL);
1721 printf(" /*** interrupted ***/\n");
1722 if (outfd!=stdout) fprintf(outfd,"/*** interrupted ***/\n");
1723 fprintf(stderr,"/*** interrupted ***/\n");
1725 printsum(stdout,db);
1726 if (outfd!=stdout) printsum(outfd,db);
1732 void save_best(struct buf_head *cur_buf, struct mngmsg m_msg, struct pstruct pst,
1733 FILE *fdata, int *qsfnum, struct hist_str *histp,
1736 , int *s_score, int *s_n1
1743 struct buf_str *p_rbuf, *cur_buf_p;
1744 int i, t_best, t_rbest, t_qrbest, tm_best, t_n1, sc_ix;
1745 double e_score, tm_escore, t_rescore, t_qrescore;
1748 sc_ix = pst.score_ix;
1750 cur_buf_p = cur_buf->buf;
1752 t_best = t_rbest = t_qrbest = -1;
1753 tm_escore = t_rescore = t_qrescore = FLT_MAX;
1755 while (cur_buf->buf_cnt--) { /* count down the number of results */
1756 p_rbuf = cur_buf_p++; /* step through the results buffer */
1758 i_score = p_rbuf->rst.score[sc_ix];
1759 e_score = p_rbuf->rst.escore;
1761 /* need to look for frame 0 if TFASTA, then save stats at frame 6 */
1764 "%-12s %5d %6d %d %.5f %.5f %4d %4d %4d %g %d %d %8ld\n",
1767 sfn_cmp(qsfnum,p_rbuf->sfnum),
1771 p_rbuf->n1,p_rbuf->frame,p_rbuf->rst.comp,p_rbuf->rst.H,
1772 p_rbuf->rst.score[0],p_rbuf->rst.score[1],p_rbuf->rst.score[2],
1773 p_rbuf->rst.escore, p_rbuf->rst.segnum, p_rbuf->rst.seglen, p_rbuf->lseek);
1777 if (p_rbuf->lseek==0) {
1778 s_score[0] = p_rbuf->rst.score[0];
1779 s_score[1] = p_rbuf->rst.score[1];
1780 s_score[2] = p_rbuf->rst.score[2];
1783 bestp = bestp_arr[nbest++];
1784 bestp->score[0] = s_score[0];
1785 bestp->score[1] = s_score[1];
1786 bestp->score[2] = s_score[2];
1788 bestp->escore = p_rbuf->rst.escore;
1789 bestp->segnum = p_rbuf->rst.segnum;
1790 bestp->seglen = p_rbuf->rst.seglen;
1791 bestp->zscore = zscore;
1792 bestp->lseek = p_rbuf->lseek;
1793 bestp->m_file_p = p_rbuf->m_file_p;
1794 memcpy(bestp->libstr,p_rbuf->libstr,MAX_UID);
1795 bestp->n1tot_p = p_rbuf->n1tot_p;
1796 bestp->frame = p_rbuf->frame;
1803 if (i_score > t_best) tm_best = t_best = i_score;
1804 if (e_score < tm_escore) tm_escore = e_score;
1806 if (m_msg.qshuffle) {
1807 if (p_rbuf->qr_score > t_qrbest)
1808 t_qrbest = p_rbuf->qr_score;
1809 if (p_rbuf->qr_escore < t_qrescore)
1810 t_qrescore = p_rbuf->qr_escore;
1812 if (p_rbuf->frame == m_msg.nitt1 && nqstats < m_msg.shuff_max) {
1813 qstats[nqstats].n1 = p_rbuf->n1; /* save the best score */
1814 qstats[nqstats].comp = p_rbuf->rst.comp;
1815 qstats[nqstats].H = p_rbuf->rst.H;
1816 qstats[nqstats].escore = t_qrescore;
1817 qstats[nqstats++].score = t_qrbest;
1818 t_qrbest = -1; /* reset t_qrbest, t_qrescore */
1819 t_qrescore = FLT_MAX;
1823 if (pst.zsflag >= 10 && p_rbuf->r_score > t_rbest) {
1824 t_rbest = p_rbuf->r_score;
1825 t_rescore = p_rbuf->r_escore;
1828 /* statistics done for best score of set */
1831 if (p_rbuf->frame == m_msg.nitt1) {
1832 if (nstats < MAXSTATS ) {
1833 stats[nstats].n1 = t_n1;
1834 stats[nstats].comp = p_rbuf->rst.comp;
1835 stats[nstats].H = p_rbuf->rst.H;
1836 if (pst.zsflag >= 10) {
1838 tm_escore = t_rescore;
1840 t_rescore = FLT_MAX;
1842 stats[nstats].escore = tm_escore;
1843 stats[nstats++].score = tm_best;
1845 tm_escore = FLT_MAX;
1847 else if (pst.zsflag > 0) {
1849 pst.zsflag_f = process_hist(stats,nstats,m_msg,pst,
1850 histp, pstat_voidp,0);
1853 for (i=0; i<MAXBEST; i++) {
1854 bestp_arr[i]->zscore =
1855 (*find_zp)(bestp_arr[i]->score[pst.score_ix],
1856 bestp_arr[i]->escore, bestp_arr[i]->n1,
1857 bestp_arr[i]->comp, *pstat_voidp);
1862 if (!m_msg.escore_flg) {
1863 jstats = nrand(++kstats);
1864 if (jstats < MAXSTATS) {
1865 stats[jstats].n1 = t_n1;
1866 stats[jstats].comp = p_rbuf->rst.comp;
1867 stats[jstats].H = p_rbuf->rst.H;
1868 if (pst.zsflag >= 10) {
1871 stats[jstats].score = tm_best;
1879 /* best saved for every score */
1882 zscore=(*find_zp)(i_score, e_score, p_rbuf->n1,(double)p_rbuf->rst.comp,
1885 if (p_rbuf->frame == m_msg.nitt1) {
1886 addhistz((*find_zp)(t_best, tm_escore, p_rbuf->n1, (double) p_rbuf->rst.comp,
1887 *pstat_voidp), histp);
1888 t_best = t_rbest = -1;
1889 tm_escore = t_rescore = FLT_MAX;
1892 else zscore = (double) i_score;
1895 if (zscore > zbestcut) {
1896 if (nbest >= MAXBEST) {
1897 bestfull = nbest-MAXBEST/4;
1898 selectbestz(bestp_arr,bestfull-1,nbest);
1899 zbestcut = bestp_arr[bestfull-1]->zscore;
1902 bestp = bestp_arr[nbest++];
1903 bestp->score[0] = p_rbuf->rst.score[0];
1904 bestp->score[1] = p_rbuf->rst.score[1];
1905 bestp->score[2] = p_rbuf->rst.score[2];
1906 bestp->comp = (double) p_rbuf->rst.comp;
1907 bestp->H = (double) p_rbuf->rst.H;
1908 bestp->escore = p_rbuf->rst.escore;
1909 bestp->segnum = p_rbuf->rst.segnum;
1910 bestp->seglen = p_rbuf->rst.seglen;
1911 bestp->zscore = zscore;
1912 bestp->lseek = p_rbuf->lseek;
1913 memcpy(bestp->libstr,p_rbuf->libstr,MAX_UID);
1914 bestp->cont = p_rbuf->cont; /* not cont+1 because incremented already */
1915 bestp->m_file_p = p_rbuf->m_file_p;
1916 bestp->n1 = p_rbuf->n1;
1917 bestp->n1tot_p = p_rbuf->n1tot_p;
1918 bestp->frame = p_rbuf->frame;
1919 bestp->nsfnum = p_rbuf->nsfnum;
1921 if ((bestp->sfnum[0] = p_rbuf->sfnum[0])>0 &&
1922 (bestp->sfnum[1] = p_rbuf->sfnum[1])>0 &&
1923 (bestp->sfnum[2] = p_rbuf->sfnum[2])>0 &&
1924 (bestp->sfnum[3] = p_rbuf->sfnum[3])>0 &&
1925 (bestp->sfnum[4] = p_rbuf->sfnum[4])>0 &&
1926 (bestp->sfnum[5] = p_rbuf->sfnum[5])>0 &&
1927 (bestp->sfnum[6] = p_rbuf->sfnum[6])>0 &&
1928 (bestp->sfnum[7] = p_rbuf->sfnum[7])>0 &&
1929 (bestp->sfnum[8] = p_rbuf->sfnum[8])>0 &&
1930 (bestp->sfnum[9] = p_rbuf->sfnum[9])>0) ;