1 /* copyright (c) 1996, 1997, 1998, 1999 William R. Pearson and the
4 /* $Name: fa_34_26_5 $ - $Id: p2_complib.c,v 1.96 2007/01/12 20:15:16 wrp Exp $ */
7 * pcomplib.c : Parallel library search
9 * #define FIRSTNODE 0/1 (in msg.h) can be used to reserve one node
10 * for collecting results
12 * Parallel specific options (from doinit.c):
13 * -J # jump to query #
14 * -I self-comparison, do (N choose 2) comparisons
15 * -T # number of workers
18 /* This version is modifed to read all files, query and database,
19 through the manager process. Workers will now receive their
20 database from the manager, rather than reading it themselves. This
21 cuts down considerably on NFS traffic, simplifies searches of
22 multiple files, and allows use of clusters of slave nodes that do
26 /* modified 5-November-2004 to ensure 15 byte (SEQ_PAD) NULL
29 modified 12-December-2006 to ensure n0>0 before SEQ_PAD padding.
43 #include <sys/types.h>
49 char *mp_verstr="34.26, January 12, 2007 PVM";
54 char *mp_verstr="34.26, January 12, 2007 MPI";
69 char workerpgm[MAX_FN];
70 char managepgm[MAX_FN];
76 /********************************/
77 /* global variable declarations */
78 /********************************/
79 char gstring2[MAX_STR]; /* string for label */
80 char gstring3[MAX_STR]; /* string for label */
81 char hstring1[MAX_STR];
83 int nsfnum; /* number of superfamily numbers */
84 int sfnum[10]; /* superfamily number from types 0 and 5 */
88 /********************************/
89 /* extern variable declarations */
90 /********************************/
91 extern char *prog_func; /* function label */
92 extern char *verstr, *iprompt0, *iprompt1, *iprompt2, *refstr;
94 /********************************/
95 /*extern function declarations */
96 /********************************/
98 void libchoice(char *lname, int, struct mngmsg *); /* lib_sel.c */
99 void libselect(char *lname, struct mngmsg *); /* lib_sel.c */
101 extern void closelib();
102 /* check for DNA sequence (nxgetaa.c) */
103 extern int scanseq(unsigned char *seq, int n, char *str);
104 extern void re_ascii(int *qascii, int *sascii);
105 extern int recode(unsigned char *seq, int n, int *qascii, int nsq);
107 /* 1d to 2d pam (initxx.c) */
108 extern void initpam2 (struct pstruct *ppst);
109 /* initialize environment (doinit.c) */
110 extern void h_init (struct pstruct *ppst, struct mngmsg *, char *);
111 extern void s_abort (char *p, char *p1);
112 extern void query_parm (struct mngmsg *m_msp, struct pstruct *ppst);
113 extern void last_init (struct mngmsg *, struct pstruct *, int);
115 extern void initenv (int argc, char **argv, struct mngmsg *m_msg,
116 struct pstruct *ppst, unsigned char **aa0);
118 /* print hist, summaries, timing information */
119 void prhist(FILE *, struct mngmsg, struct pstruct, struct hist_str, int nstats, struct db_str, char *);
120 void printsum(FILE *);
121 extern void ptime (FILE *, time_t);
123 /* reset parameters if DNA sequence (initxx.c) */
124 extern void resetp (struct mngmsg *, struct pstruct *);
126 /* read a sequence (nmgetlib.c) */
127 struct lmf_str *openlib(char *, int, int *, int, struct lmf_str *);
129 #define QGETLIB (q_file_p->getlib)
130 #define LGETLIB (l_file_p->getlib)
132 /* these functions are in scaleswn.c */
133 extern int process_hist(struct stat_str *sptr, int nstat,
134 struct mngmsg m_msg, struct pstruct pst,
135 struct hist_str *hist, void **pstat_void, int);
136 extern double zs_to_E(double zs, int n1, int isdna, long, struct db_str ntt);
137 extern double (*find_zp)(int score, double escore, int length, double comp, void *);
138 void addhistz(double zscore, struct hist_str *); /* scaleswn.c */
139 void last_stats(const unsigned char *aa0, int n0,
140 struct stat_str *sptr, int nstats,
141 struct beststr **bestp_arr, int nbest,
142 struct mngmsg m_msg, struct pstruct pst,
143 struct hist_str *histp, void *rs);
145 void selectbestz(struct beststr **, int, int);
146 void sortbest(struct beststr **, int, int);
148 void showbest (FILE *fp, struct beststr **bptr, int nbest,
149 int qlib, struct mngmsg *m_msg, struct pstruct pst,
150 struct db_str ntt, char *gstring2);
152 void showalign (FILE *fp,
153 struct beststr **bptr, int nbest,int qlib, struct mngmsg m_msg,
154 struct pstruct pst, char *gstring2);
158 int pinums[MAXNOD],hosttid;
160 struct pvmhostinfo *hostp;
163 FILE *outfd; /* Output file */
165 extern time_t s_time (); /* fetches time for timing */
167 /* this information is global for fsigint() */
168 time_t tstart, tscan, tprev, tdone; /* Timing */
169 time_t tdstart, tddone, time();
170 int max_nodes, nnodes; /* number of nodes */
171 int node_map[MAXWRKR], node_id[MAXWRKR];
172 int tot_speed,h_speed;
173 int qlib = 0; /* number of sequences scanned */
174 struct db_str ntt, qtt;
176 extern int max_workers, worker_1, worker_n;
177 int wlsn [MAXWRKR + 1]; /* number of library sequences in worker */
178 int clsn [MAXWRKR + 1]; /* number of 1st library sequence in worker */
184 #define WORKERPGM "c34.work"
188 main (int argc, char *argv[])
190 unsigned char *aa00, *aa01, *aa0p0, *aa0p1;
191 unsigned char *aa1, *aa1ptr, *aa1prev;
192 int aa1i, *aa1i_arr; /* integer offset of sequence in buffer */
195 int *n1tot_ptr=NULL, *n1tot_cur;
203 struct lmf_str *q_file_p;
204 struct lmf_str *l_file_p;
206 /* from manage code */
207 struct mngmsg m_msg0, m_msg1; /* Message from host to manager */
208 struct mngmsg *m_msp0, *m_msp1; /* alternating pointers */
209 struct qmng_str qm_msg0, qm_msg1; /* stuff updated for each query */
213 struct qmng_str *qm_msp0, *qm_msp1; /* pointer to stuff updated */
214 int last_msg_b[10]; /* last set of numbers */
215 long curtype = ONETYPE; /* current message type */
217 struct beststr *best, /* array of best scores */
218 **bptr; /* array of pointers */
219 struct comstr bestr[BFR+1]; /* temporary structure array */
220 struct comstr2 bestr2[BFR2+1]; /* temporary structure array */
221 struct a_struct *aln_d_base=NULL; /* alignment info for -m 9 */
222 int qres_bufsize; /* buffer size for results */
223 struct stat_str *stats=NULL, *qstats=NULL;
224 int best_flag = 1; /* bptr[] must be re-initialized */
225 int fast_flag = 0; /* send new sequences before old displayed */
226 int nstats, nqstats, kstats, jstats;
227 int nbest, nres; /* number of best scores */
228 double zbestcut = -BIGNUM; /* z-value cutoff */
229 int lcnt; /* counters */
231 int i, j, k, is, id, iw, ires, naa0 = 0;
233 FILE *fdata=NULL; /* file for full results */
235 struct sql *ldes; /* descriptive lines for all lib sequences */
236 char *bline_buf, *bline_bufp;
237 char *bline_buf_mx; /* buffer for blines */
240 int max_bline_b, bline_inc;
241 int *n1_arr, *m_seqnm_arr;
242 unsigned char *aa1_buf;
244 char tlibstr[11]; /* used only for fdata *.res files */
246 int node, snode, zero; /* Number of nodes */
247 int bufid, numt, tid;
251 int ntbuff, nseq, m_seqnm;
252 int iln, ocont, maxt;
255 int leng; /* leng is length of the descriptive line */
256 fseek_t qseek,lseek; /* seek into library of current sequence */
257 int qlcont,lcont; /* continued sequence */
260 int stats_done =0; /* flag for z-value processing */
261 int tm_best, t_rbest, t_qrbest, t_best, t_n1;
262 double e_score, tm_escore, t_rescore, t_qrescore;
263 double zscore; /* tmp value */
265 char tmp_str[MAX_FN];
266 char pgm_abbr[MAX_SSTR];
269 MPI_Status mpi_status;
274 signal(SIGHUP,SIG_IGN);
275 if (signal(SIGINT,SIG_IGN) != SIG_IGN) signal(SIGINT,fsigint);
276 if (signal(SIGQUIT,SIG_IGN) != SIG_IGN) signal(SIGQUIT,fsigint);
277 /* if (signal(SIGSEGV,SIG_IGN) != SIG_IGN) signal(SIGSEGV,fsigint); */
283 m_msg0.quiet = !isatty(1);
286 /* BFR must be %6 = 0 for TFASTA */
288 fprintf(stderr," BFR size %d not %%6=0 - recompile\n",BFR);
293 MPI_Init(&argc, &argv);
294 MPI_Comm_rank(MPI_COMM_WORLD,&tid);
303 for (i=0; i<argc; i++) {
304 if (strchr(argv[i],' ')) printf(" \"%s\"",argv[i]);
305 else printf(" %s",argv[i]);
310 MPI_Comm_size(MPI_COMM_WORLD,&nnodes);
312 fprintf(stderr," nnodes = %d; no workers available\n",nnodes);
315 else fprintf(stderr," have %d nodes\n",nnodes);
317 tot_speed = nnodes*100;
320 h_init (&pst,&m_msg0, pgm_abbr);
322 initenv (argc, argv, &m_msg0, &pst, &aa00);
325 strncpy (workerpgm, WORKERPGM,sizeof(workerpgm)-1);
326 strncat(workerpgm, pgm_abbr, sizeof(workerpgm)-strlen(workerpgm)-1);
327 workerpgm[sizeof(workerpgm)-1] = '\0';
330 strncpy(q_sqnam,"aa",sizeof(q_sqnam));
332 if (m_msg0.qdnaseq != SEQT_UNK &&
333 (m_msg0.qdnaseq == SEQT_DNA || m_msg0.qdnaseq == SEQT_RNA))
334 strncpy(q_sqnam,"nt",sizeof(q_sqnam));
336 m_msg0.pstat_void = NULL;
337 m_msg0.hist.hist_a = NULL;
339 fprintf (stderr, "Pcomp library processor\n");
340 fprintf (stderr, "Using %s\n", prog_func);
342 tstart = tscan = s_time();
343 tdstart = time(NULL);
347 if ((hosttid=pvm_mytid())<0) {
348 pvm_perror("initialization");
349 fprintf(stderr,"can't initialize %s\n", argv[0]);
354 pvm_config(&nnodes,&narch,&hostp);
355 fprintf(stderr,"nnodes: %d, narch: %d\n",nnodes, narch);
359 pvm_catchout(stderr);
362 /* if (nnodes < 2 ) nnodes = 4; */
363 if (max_workers > 0 && nnodes > max_workers) {
364 nnodes = max_workers+FIRSTNODE;
365 fprintf(stderr," workers reset from %d to %d\n",
366 max_nodes,nnodes-FIRSTNODE);
368 else max_workers = nnodes;
370 strncpy(nodefile,pgmdir,sizeof(nodefile)-1);
371 strncat(nodefile,workerpgm,sizeof(nodefile)-strlen(nodefile)-1);
372 nodefile[sizeof(nodefile)-1] = '\0';
375 /* remap configuration to specific nodes */
376 for (i=FIRSTNODE, j=worker_1; i<nnodes && j<=worker_n; i++,j++)
379 max_workers = i-FIRSTNODE;
380 fprintf(stderr," workers remapped from %d to %d\n",
381 max_nodes,nnodes-FIRSTNODE);
385 for (i=0; i< nnodes; i++) node_map[i]=node_id[i] = i;
388 if (nnodes < max_nodes) {
389 hostp++; /* bump over host name for spawn */
390 rand_nodes(node_map,nnodes,max_nodes-1);
391 for (i=FIRSTNODE; i<nnodes; i++) {
392 numt+=pvm_spawn(nodefile,NULL,PvmTaskHost,hostp[node_map[i]].hi_name,
397 /* i counts through nodes (machines) */
398 /* j counts through processes (multiple processes/node) */
399 /* node map maps the process (virtual node) to a physical node (machine) */
401 for (i=j=FIRSTNODE; i<nnodes && j < MAXWRKR; i++) {
402 n_proc = hostp[node_id[i]].hi_speed%100;
403 if (n_proc == 0) n_proc = 1;
404 if (n_proc > max_workers) n_proc = max_workers;
406 n_tmp =pvm_spawn(nodefile,NULL,PvmTaskHost,hostp[node_id[i]].hi_name,
409 fprintf(stderr," spawn problem: %d\n", pinums[j]);
411 for (k=j; k < j+n_tmp; k++) node_map[k]=node_id[i];
427 for (tot_speed=0,i=FIRSTNODE; i<nnodes; i++) {
429 fprintf(stderr," tids %d %8o\n",i,pinums[i]);
435 h_speed = hostp[node_map[tidtonode(pinums[i])]].hi_speed;
436 if (h_speed <= 0) h_speed = 100;
437 fprintf(stderr," tids %d %8o %s %5d\n",i,pinums[i],
438 hostp[node_map[tidtonode(pinums[i])]].hi_name,
440 tot_speed +=(hostp[node_map[tidtonode(pinums[i])]].hi_speed);
444 strncpy(worknode,nodefile,sizeof(worknode));
445 fprintf (stderr, "%3d worker programs loaded from %s\n",
446 nnodes-FIRSTNODE,worknode);
449 /* need to allocate two aa0 arrays so that the old is saved for alignments */
451 /* Allocate space for the query sequence */
452 if ((aa00 = (unsigned char *) malloc ((MAXTST + SEQ_PAD + 1)* sizeof (char))) == NULL)
453 s_abort ("Unable to allocate query sequence", "");
455 if ((aa01 = (unsigned char *) malloc ((MAXTST + SEQ_PAD + 1) * sizeof (char))) == NULL)
456 s_abort ("Unable to allocate query sequence", "");
458 fputs(iprompt0,stdout);
459 fprintf(stdout," %s%s\n",verstr,refstr);
462 if (m_msg0.tname[0] == '\0') {
463 if (m_msg0.quiet == 1) s_abort("query sequence undefined","");
465 fprintf(stderr, "Pvcomplib [%s]\n",mp_verstr);
466 l1: fputs (iprompt1, stdout);
468 if (fgets (m_msg0.tname, 80, stdin) == NULL)
469 s_abort ("Unable to read query library name","");
470 if ((bp=strchr(m_msg0.tname,'\n'))!=NULL) *bp='\0';
471 if (m_msg0.tname[0] == '\0') goto l1;
474 /* Open query library */
476 openlib(m_msg0.tname, m_msg0.qdnaseq,qascii,!m_msg0.quiet,NULL))==NULL) {
477 s_abort(" cannot open library ",m_msg0.tname);
481 printf ("searching %s library\n",m_msg0.tname);
485 ntt.entries = qtt.entries = 0;
486 ntt.carry = qtt.carry = 0;
487 ntt.length = qtt.length = 0l;
489 /* Fetch first sequence */
491 while (qlib < m_msg0.ql_start) { /* skip through query sequences */
492 pst.n0 = qm_msg0.n0 = m_msg0.n0 =
493 QGETLIB (aa00, MAXTST, q_bline, sizeof(q_bline), &qseek, &qlcont,
494 q_file_p,&m_msg0.sq0off);
496 strncpy(qm_msg0.libstr,q_bline,sizeof(qm_msg0.libstr)-20);
497 qm_msg0.libstr[sizeof(qm_msg0.libstr)-21]='\0';
498 if ((bp=strchr(qm_msg0.libstr,' '))!=NULL) *bp='\0';
500 /* if annotations are included in sequence, remove them */
501 if (m_msg0.ann_flg) {
502 pst.n0 = qm_msg0.n0 = m_msg0.n0 =
503 ann_scan(aa00, m_msg0.n0, &m_msg0, m_msg0.qdnaseq);
505 fprintf(stderr,"m_msp0->/aa0a is: %o/%o\n",&m_msg0,m_msg0.aa0a);
509 if (m_msg0.term_code &&
510 !(m_msg0.qdnaseq == SEQT_DNA || m_msg0.qdnaseq==SEQT_RNA) &&
511 aa00[m_msg0.n0-1]!='*') {
512 aa00[m_msg0.n0++]='*';
514 pst.n0 = qm_msg0.n0 = m_msg0.n0;
517 /* check for subset */
518 if (q_file_p->opt_text[0]!='\0') {
519 if (q_file_p->opt_text[0]=='-') {
520 sstart=0; sscanf(&q_file_p->opt_text[1],"%d",&sstop);
523 sscanf(&q_file_p->opt_text[0],"%d-%d",&sstart,&sstop);
525 if (sstop <= 0 ) sstop = BIGNUM;
527 for (id=0,is=sstart; is<min(m_msg0.n0,sstop); ) aa00[id++]=aa00[is++];
529 pst.n0 = qm_msg0.n0 = m_msg0.n0 = min(m_msg0.n0,sstop)-sstart;
530 if (m_msg0.sq0off==1) m_msg0.sq0off = sstart+1;
536 s_abort ("Unable to fetch sequence from library: ", m_msg0.tname);
541 /* now have correct query sequence - check sequence type and reset */
542 if (m_msg0.qdnaseq == SEQT_UNK) { /* check for DNA sequence */
543 if (m_msg0.n0 > 20 &&
544 (float)scanseq(aa00,m_msg0.n0,"ACGTUNacgtun")/(float)m_msg0.n0>0.85) {
546 m_msg0.qdnaseq = SEQT_DNA;
548 else { /* its protein */
550 m_msg0.qdnaseq = SEQT_PROT;
553 re_ascii(qascii,pascii);
554 init_ascii(pst.ext_sq_set,qascii,m_msg0.qdnaseq);
555 m_msg0.n0 = recode(aa00,m_msg0.n0,qascii,pst.nsqx);
558 /* for ALTIVEC, must pad with 15 NULL's */
559 for (i=0; i<SEQ_PAD+1; i++) {aa00[m_msg0.n0+i]=0;}
561 qtt.length = m_msg0.n0;
564 fprintf(stderr," no sequences found in query library\n");
568 resetp (&m_msg0, &pst);
570 sprintf(tmp_str," %d %s", qm_msg0.n0, q_sqnam);
571 leng = strlen (qm_msg0.libstr);
572 if (leng + strlen(tmp_str) >= sizeof(qm_msg0.libstr))
573 qm_msg0.libstr[sizeof(qm_msg0.libstr)-strlen(tmp_str)-2] = '\0';
574 strncat(&qm_msg0.libstr[0],tmp_str,
575 sizeof(qm_msg0.libstr)-strlen(qm_msg0.libstr)-1);
576 qm_msg0.libstr[sizeof(qm_msg0.libstr)-1]='\0';
578 qm_msg0.seqnm = qlib-1;
582 if (strlen (m_msg0.lname) == 0) {
583 if (m_msg0.quiet == 1) s_abort("library name undefined","");
584 libchoice(m_msg0.lname, sizeof(m_msg0.lname), &m_msg0);
587 libselect(m_msg0.lname, &m_msg0);
589 /* Get additional parameters here */
590 if (!m_msg0.quiet) query_parm (&m_msg0, &pst);
592 last_init(&m_msg0, &pst,nnodes-FIRSTNODE);
593 memcpy(&m_msg1, &m_msg0, sizeof(m_msg0));
595 /* m_msg0.maxn needs to be set to MAXLIB or MAXTRN, depending on the
596 function - max_tot has the MAXTST + (MAXLIB|MAXTRN) */
597 if (m_msg0.maxn <= 0) m_msg0.maxn = m_msg0.max_tot - MAXTST;
599 if (m_msg0.maxn < 2 * m_msg0.dupn) m_msg0.maxn = 5*m_msg0.dupn;
600 pst.maxlen = m_msg0.maxn;
602 m_msg0.loff = m_msg0.dupn;
603 m_msg0.maxt3 = m_msg0.maxn-m_msg0.loff;
606 /* ******************** */
607 /* initial manager code */
608 /* ******************** */
611 if (m_msg0.outfile[0]!='\0') {
612 if ((outfd = fopen(m_msg0.outfile,"w"))==NULL) {
613 fprintf(stderr, "cannot open %s for output\n", m_msg0.outfile);
618 /* Label the output */
619 printf("Query library %s vs %s library\n", m_msg0.tname, m_msg0.lname);
621 /* Allocate space for saved scores */
623 (struct beststr *)malloc((MAXBEST+1)*sizeof(struct beststr)))==NULL)
624 s_abort ("Cannot allocate best struct","");
626 (struct beststr **)malloc((MAXBEST+1)*sizeof(struct beststr *)))==NULL)
627 s_abort ("Cannot allocate bptr","");
629 /* Initialize bptr */
630 for (nbest = 0; nbest < MAXBEST+1; nbest++)
631 bptr[nbest] = &best[nbest];
634 best[-1].score[0]=best[-1].score[1]=best[-1].score[2]=INT_MAX;
635 best[-1].zscore = FLT_MAX;
636 best[-1].escore = FLT_MIN;
640 (struct stat_str *)calloc((size_t)MAXSTATS,sizeof(struct stat_str)))
642 s_abort ("Cannot allocate stats struct","");
645 /* Now open the second library, divide it, send sequences to all workers */
646 /* Set up buffer for reading the library:
648 We will start by using a 2 Mbyte buffer for each worker. For
649 proteins, that means 5,000 sequences of length 400 (average).
650 For DNA, that means 2,000 sequences of length 1000. At the moment,
651 those are good averages.
654 if (max_buf_cnt <= 0) {
655 if (m_msg0.ldnaseq==SEQT_DNA) max_buf_cnt = MAX_NT_BUF;
656 else max_buf_cnt = MAX_AA_BUF;
659 if (m_msg0.ldnaseq==SEQT_DNA) ave_seq_len = AVE_NT_LEN;
660 else ave_seq_len = AVE_AA_LEN;
662 /* however - buffer sizes should be a function of the number of
663 workers so that all the workers are kept busy. Assuming a 10,000
664 entry library is the smallest we want to schedule, then
667 if (max_buf_cnt > 10000/(nnodes-FIRSTNODE))
668 max_buf_cnt = 10000/(2*(nnodes-FIRSTNODE));
670 /* allocate space for sequence buffers */
672 m_msg0.pbuf_siz=max_buf_cnt*ave_seq_len;
673 if (m_msg0.pbuf_siz < 5*m_msg0.maxn)
674 m_msg0.pbuf_siz = 5*m_msg0.maxn;
678 pvm_setopt(PvmRoute,PvmRouteDirect);
680 pvm_initsend(PvmDataRaw);
681 pvm_pkint(&nnodes,1,1);
682 pvm_pkint(pinums,nnodes,1);
683 pvm_pkbyte((char *)&m_msg0,(int)sizeof(m_msg0),1);
684 for (node = FIRSTNODE; node<nnodes; node++)
685 if (pvm_send(pinums[node],STARTTYPE0)<0) {
686 pvm_perror("pvm_send1");
692 for (node = FIRSTNODE; node<nnodes; node++) {
693 MPI_Send(&m_msg0,(int)sizeof(m_msg0),MPI_BYTE,node,STARTTYPE0,
698 /* now send pst, sascii */
700 pvm_initsend(PvmDataRaw);
701 pvm_pkbyte((char *)&pst,(int)sizeof(pst),1);
702 pvm_pkbyte((char *)pascii,(int)sizeof(aascii),1);
704 for (node = FIRSTNODE; node< nnodes; node++)
705 pvm_send(pinums[node],STARTTYPE1);
708 pvm_initsend(PvmDataRaw);
709 pvm_pkint(pam12,m_msg0.pamd1*m_msg0.pamd2,1);
710 for (node = FIRSTNODE; node< nnodes; node++)
711 pvm_send(pinums[node],STARTTYPE2);
714 pvm_initsend(PvmDataRaw);
715 pvm_pkint(pam12x,m_msg0.pamd1*m_msg0.pamd2,1);
716 for (node = FIRSTNODE; node< nnodes; node++)
717 pvm_send(pinums[node],STARTTYPE3);
721 for (node=FIRSTNODE; node < nnodes; node++) {
722 MPI_Send(&pst,(int)sizeof(pst),MPI_BYTE,node,STARTTYPE1,
724 MPI_Send(pascii,(int)sizeof(aascii),MPI_BYTE,node,STARTTYPE1,
726 MPI_Send(pam12,m_msg0.pamd1*m_msg0.pamd2,MPI_INT,node,STARTTYPE2,
728 MPI_Send(pam12x,m_msg0.pamd1*m_msg0.pamd2,MPI_INT,node,STARTTYPE3,
734 (int *)calloc((size_t)(max_buf_cnt+1),sizeof(int)))
736 fprintf(stderr," cannot allocate n1_arr %d\n",max_buf_cnt+1);
737 s_abort(" cannot allocate n1_arr","");
742 (int *)calloc((size_t)(max_buf_cnt+1),sizeof(int)))
744 fprintf(stderr," cannot allocate aa1i_arr %d\n",max_buf_cnt+1);
745 s_abort(" cannot allocate aa1i_arr","");
750 (int *)calloc((size_t)(max_buf_cnt+1),sizeof(int)))
752 fprintf(stderr," cannot allocate m_seqnm_arr %d\n",max_buf_cnt+1);
753 s_abort(" cannot allocate m_seqnm_arr","");
758 (unsigned char *)calloc((size_t)(m_msg0.pbuf_siz),sizeof(unsigned char)))
760 s_abort(" cannot allocate library buffer %d","");
765 /* also allocate space for descriptions. Assume max of 250,000 sequences/
769 /* max_sql is the maxinum number of library sequences that can be stored */
772 if ((ldes=(struct sql *)calloc(max_sql,sizeof(struct sql)))==NULL) {
773 fprintf(stderr," failure to allocate ldes(%d) %ld\n",
774 max_sql,max_sql*sizeof(struct sql));
775 s_abort("cannot allocate ldes","");
779 max_bline_b = MAXSQL * (m_msg0.aln.llen+1)/4;
780 bline_inc = m_msg0.aln.llen;
781 if (m_msg0.markx & MX_M9SUMM) bline_inc += 40;
785 if ((bline_buf=(char *)calloc(max_bline_b,sizeof(char)))!=NULL) break;
789 if (bline_buf == NULL) {
790 fprintf(stderr," failure to allocate bline_buf(%d) %d\n",
791 max_sql,max_bline_b);
792 s_abort(" cannot allocate bline_buf","");
795 bline_bufp = bline_buf;
796 bline_buf_mx = bline_buf+max_bline_b;
798 /* the code for filling the buffers is copied from comp_thr.c */
799 /* the major differences reflect the fact that all library descriptions
800 will be kept in memory, indexed by sequence number.
802 As a result, one buffer is filled by this loop -
803 ldes[] has the descriptive information for every sequence
804 this array could potentially be quite large
807 /* now open the library and start reading */
808 /* get a buffer and fill it up */
811 m_seqnm = 0; /* m_seqnm is the number of this library sequence */
816 /* sqs2_buf[0].aa1 = aa1_buf; */
819 /* iln counts through each library */
820 for (iln = 0; iln < m_msg0.nln; iln++) {
822 openlib(m_msg0.lbnames[iln], m_msg0.ldnaseq,lascii,!m_msg0.quiet,NULL))==NULL) {
823 fprintf(stderr," cannot open library %s\n",m_msg0.lbnames[iln]);
827 printf ("searching %s library\n",m_msg0.lbnames[iln]);
831 n1tot_v = n1tot_cnt = 0;
832 n1tot_ptr = n1tot_cur = NULL;
836 /* read sequence directly into buffer */
837 aa1ptr = aa1; /* = sqs2_buf[0].aa1; */
839 while ((n1= LGETLIB(aa1ptr,maxt,t_bline,sizeof(t_bline),&lseek,&lcont,
840 l_file_p,&l_off))>=0) {
842 /* skip sequences outside range */
843 if (n1 < m_msg0.n1_low || n1 > m_msg0.n1_high) goto loop1;
845 /* add termination code for proteins, if asked */
846 if (m_msg0.term_code && !lcont &&
847 m_msg0.ldnaseq==SEQT_PROT && aa1ptr[n1-1]!=m_msg0.term_code) {
848 aa1ptr[n1++]=m_msg0.term_code;
852 /* check for a continued sequence and provide a pointer to
853 the n1_tot array if lcont || ocont */
855 if (lcont && !ocont) { /* get a new pointer */
856 if (n1tot_cnt <= 0) {
857 if ((n1tot_ptr=calloc(1000,sizeof(int)))==NULL) {
858 fprintf(stderr," cannot allocate n1tot_ptr\n");
861 else {n1tot_cnt=1000;}
864 n1tot_cur = n1tot_ptr++;
867 if (bline_bufp + bline_inc > bline_buf_mx) {
870 if ((bline_buf=(char *)calloc(max_bline_b,sizeof(char)))!=NULL)
872 fprintf(stderr," failure to allocate bline_buf(%d) %d\n",
873 max_sql,max_bline_b);
877 if (bline_buf != NULL) {
878 bline_bufp = bline_buf;
879 bline_buf_mx = bline_buf+max_bline_b;
882 s_abort("cannot allocate bline_buf ","");
887 if (bline_bufp+bline_inc < bline_buf_mx ) {
888 strncpy(bline_bufp,t_bline,bline_inc);
889 ldes[m_seqnm].bline = bline_bufp;
890 bline_bufp[bline_inc]= '\0';
891 bline_bufp += bline_inc+1;
894 fprintf(stderr," bline_buf overrun\n");
897 ntt.entries++; /* inc number of sequences */
898 ntt.length += n1; /* update total library length */
899 if (ntt.length > LONG_MAX) {ntt.length -= LONG_MAX; ntt.carry++;}
902 /* This discovers most reasons for core dumps */
905 if (aa1[i]>pst.nsq) {
907 "%s residue[%d/%d] %d range (%d) lcont/ocont: %d/%d\n%s\n",
908 qm_msg0.libstr,i,n1,aa1[i],pst.nsq,lcont,ocont,aa1ptr+i);
915 /* for ALTIVEC, must pad with 15 NULL's */
916 for (i=0; i<SEQ_PAD+1; i++) {aa1ptr[n1+i]=0;}
918 /* don't count long sequences more than once */
920 n1 += m_msg0.loff; m_msg0.db.entries--; ntt.entries--;
925 desptr = &ldes[m_seqnm];
927 aa1i_arr[nseq] = (int)(aa1-aa1_buf);
928 m_seqnm_arr[nseq] = m_seqnm;
929 desptr->n1 = n1_arr[nseq] = n1;
930 desptr->n1tot_p = n1tot_cur;
931 desptr->lseek = lseek;
932 desptr->loffset = loffset+l_off;
933 desptr->cont = ocont;
935 desptr->nsfnum = nsfnum;
937 if ((desptr->sfnum[0]=sfnum[0])>0 &&
938 (desptr->sfnum[1]=sfnum[1])>0 &&
939 (desptr->sfnum[2]=sfnum[2])>0 &&
940 (desptr->sfnum[3]=sfnum[3])>0 &&
941 (desptr->sfnum[4]=sfnum[4])>0 &&
942 (desptr->sfnum[5]=sfnum[5])>0 &&
943 (desptr->sfnum[6]=sfnum[6])>0 &&
944 (desptr->sfnum[7]=sfnum[7])>0 &&
945 (desptr->sfnum[8]=sfnum[8])>0 &&
946 (desptr->sfnum[9]=sfnum[9])>0) ;
951 if (m_seqnm >= max_sql) {
953 if ((ldes=(struct sql *)realloc(ldes,max_sql*sizeof(struct sql)))
955 fprintf(stderr," failure to realloc ldes(%d) %ld\n",
956 max_sql,max_sql*sizeof(struct sql));
957 s_abort("cannot allocate ldes","");
966 ntbuff += n1+1+SEQ_PAD;
968 /* if the buffer is filled */
969 if (nseq >= max_buf_cnt || ntbuff >= m_msg0.pbuf_siz - m_msg0.maxn) {
970 /* provide filled buffer to workers */
972 pvm_initsend(PvmDataRaw);
973 pvm_pkint(&nseq,1,1);
974 pvm_pkint(&ntbuff,1,1);
975 pvm_pkint(n1_arr,nseq,1);
976 pvm_pkint(aa1i_arr,nseq,1);
977 pvm_pkint(m_seqnm_arr,nseq,1);
978 pvm_send(pinums[node],STARTTYPE4);
980 pvm_initsend(PvmDataRaw);
981 pvm_pkbyte((char *)aa1_buf,ntbuff,1);
982 pvm_send(pinums[node],STARTTYPE5);
985 MPI_Send(&nseq,1,MPI_INT,node,STARTTYPE4, MPI_COMM_WORLD);
986 MPI_Send(&ntbuff,1,MPI_INT,node,STARTTYPE4, MPI_COMM_WORLD);
987 MPI_Send(n1_arr,nseq,MPI_INT,node,STARTTYPE4, MPI_COMM_WORLD);
988 MPI_Send(aa1i_arr,nseq,MPI_INT,node,STARTTYPE4, MPI_COMM_WORLD);
989 MPI_Send(m_seqnm_arr,nseq,MPI_INT,node,STARTTYPE4, MPI_COMM_WORLD);
991 MPI_Send(aa1_buf,ntbuff,MPI_BYTE,node,STARTTYPE5,MPI_COMM_WORLD);
997 if (++node >= nnodes) node = FIRSTNODE;
1002 memcpy(aa1,&aa1prev[n1-m_msg0.loff],m_msg0.loff);
1003 aa1ptr = &aa1[m_msg0.loff];
1005 maxt = m_msg0.maxt3;
1006 loffset += n1 - m_msg0.loff;
1009 if (ocont) *n1tot_cur = n1tot_v;
1020 } /* for (iln < nln) */
1024 pvm_initsend(PvmDataRaw);
1025 pvm_pkint(&nseq,1,1);
1026 pvm_pkint(&ntbuff,1,1);
1027 pvm_pkint(n1_arr,nseq,1);
1028 pvm_pkint(aa1i_arr,nseq,1);
1029 pvm_pkint(m_seqnm_arr,nseq,1);
1030 pvm_send(pinums[node],STARTTYPE4);
1032 pvm_initsend(PvmDataRaw);
1033 pvm_pkbyte((char *)aa1_buf,ntbuff,1);
1034 pvm_send(pinums[node],STARTTYPE5);
1037 MPI_Send(&nseq,1,MPI_INT,node,STARTTYPE4, MPI_COMM_WORLD);
1038 MPI_Send(&ntbuff,1,MPI_INT,node,STARTTYPE4, MPI_COMM_WORLD);
1039 MPI_Send(n1_arr,nseq,MPI_INT,node,STARTTYPE4, MPI_COMM_WORLD);
1040 MPI_Send(aa1i_arr,nseq,MPI_INT,node,STARTTYPE4, MPI_COMM_WORLD);
1041 MPI_Send(m_seqnm_arr,nseq,MPI_INT,node,STARTTYPE4, MPI_COMM_WORLD);
1043 MPI_Send(aa1_buf,ntbuff,MPI_BYTE,node,STARTTYPE5,MPI_COMM_WORLD);
1047 /* fprintf(stderr," all sequences sent\n"); */
1049 if (ntt.entries <= 0) {
1050 s_abort("no reference library sequences found\n","");
1054 for (node=FIRSTNODE; node < nnodes; node++) {
1056 pvm_initsend(PvmDataRaw);
1057 pvm_pkint(&zero,1,1);
1058 pvm_pkint(&zero,1,1);
1059 pvm_pkint(n1_arr,1,1);
1060 pvm_pkint(aa1i_arr,1,1);
1061 pvm_pkint(m_seqnm_arr,1,1);
1062 pvm_send(pinums[node],STARTTYPE4);
1065 MPI_Send(&zero,1,MPI_INT,node,STARTTYPE4, MPI_COMM_WORLD);
1066 MPI_Send(&zero,1,MPI_INT,node,STARTTYPE4, MPI_COMM_WORLD);
1067 MPI_Send(n1_arr,0,MPI_INT,node,STARTTYPE4, MPI_COMM_WORLD);
1068 MPI_Send(aa1i_arr,0,MPI_INT,node,STARTTYPE4, MPI_COMM_WORLD);
1069 MPI_Send(m_seqnm_arr,0,MPI_INT,node,STARTTYPE4, MPI_COMM_WORLD);
1073 for (node = FIRSTNODE; node < nnodes; node++) {
1075 bufid = pvm_recv(-1,STARTTYPE0);
1076 pvm_bufinfo(bufid,NULL,NULL,&tid);
1077 snode = tidtonode(tid);
1078 pvm_upkint(&lcnt,1,1);
1082 MPI_Recv(&lcnt,1,MPI_INT,MPI_ANY_SOURCE,STARTTYPE0,
1083 MPI_COMM_WORLD,&mpi_status);
1084 snode= mpi_status.MPI_SOURCE;
1086 wlsn [snode-FIRSTNODE] = lcnt;
1087 fprintf(stderr," %d sequences at %d\n",lcnt,snode);
1090 /* print out all descriptions */
1092 for (node = FIRSTNODE; node < nnodes; node++)
1093 for (lcnt = 0; lcnt < wlsn[node-FIRSTNODE]; lcnt ++)
1094 printf("%2d:%3d\t%s\n",node,lcnt,ldes[lcnt].bline);
1097 /* Calculate cumulative totals and send to workers for a self search */
1099 clsn [0] = nclib= 0;
1100 for (node = FIRSTNODE; node < nnodes-1; node++) {
1101 /* clsn[] is for the next node */
1102 clsn[node-FIRSTNODE+1] = nclib += wlsn[node-FIRSTNODE];
1106 for (node = FIRSTNODE; node < nnodes; node++) {
1108 pvm_initsend(PvmDataRaw);
1109 pvm_pkint(&clsn[node-FIRSTNODE],1,1);
1110 pvm_send(pinums[node],STARTTYPE1);
1113 MPI_Send(&clsn[node-FIRSTNODE],1,MPI_INT,node,STARTTYPE1,MPI_COMM_WORLD);
1115 fprintf(stderr,"sending lend: %d to worker %d\n",clsn[node-FIRSTNODE],node);
1118 last_msg_b[0] = m_msg0.nbr_seq = m_msg1.nbr_seq = ntt.entries;
1121 /* if BFR is too big for this library, reduce it */
1122 while ( ntt.entries*(m_msg0.nitt1+1)/(2*nnodes) < qres_bufsize) {
1124 if ((qres_bufsize%(m_msg0.nitt1+1))!= 0) {
1125 qres_bufsize *= (m_msg0.nitt1+1);
1128 if (qres_bufsize < 50) break;
1130 last_msg_b[1] = qres_bufsize;
1132 fprintf(stderr," using BFR=%d/%d\n",qres_bufsize,BFR);
1135 pvm_initsend(PvmDataRaw);
1136 pvm_pkint(last_msg_b,2,1);
1137 for (node=FIRSTNODE; node < nnodes; node++)
1138 pvm_send(pinums[node],STARTTYPE0);
1141 for (node=FIRSTNODE; node < nnodes; node++)
1142 MPI_Send(last_msg_b,2,MPI_INT,node,STARTTYPE0,MPI_COMM_WORLD);
1145 tscan = tprev = s_time();
1147 /**************************************
1148 The logic of this section has been simplified to allow multistage
1149 comparison functions to be used and alignments to be generated.
1151 send 1st query to workers
1152 get next query sequence from host (m_msp1)
1153 L1: get results from next-1 search (m_msp0)
1154 sort the results of the next-1 search
1155 (possibly) do additional stages of search
1156 (possibly produce alignments for search
1157 send next query to workers (m_msp1)
1158 display result of next-1 search (m_msp0)
1159 get next query sequence from host (m_msp1)
1162 As a result of the interleaving, there must be two qm_msg structures,
1163 one for the next-1 sequence (which is required for labeling the
1164 output), and one for the next sequence (which is sent to the workers
1165 while the results are being displayed. qm_msp0 and qm_msp1 alternate
1166 between these two structures.
1167 ***************************************/
1170 qm_msp0 points to the older qm_msg
1171 qm_msp1 points to the newer qm_msg
1172 the assignment below goes with curtype==ONETYPE
1180 aa0p0 = aa00; /* aa0p0 is the "old" sequence */
1181 aa0p1 = aa01; /* aa0p1 is the "new" sequence */
1183 last_params(aa00,m_msp0->n0,m_msp0,&pst,qm_msp0);
1185 /* process_hist() is called here to get find_zp(), and some other
1186 structures initialized that would otherwise not be initialized
1187 because z-scores are not being calculated */
1189 if (m_msp0->escore_flg) {
1190 pst.zsflag_f = process_hist(stats,nstats,*m_msp0,pst,
1191 &m_msp0->hist,&m_msp0->pstat_void,0);
1195 if (m_msp0->qshuffle && qstats==NULL) {
1197 (struct stat_str *)calloc(m_msg0.shuff_max+1,sizeof(struct stat_str)))==NULL)
1198 s_abort ("Cannot allocate qstats struct","");
1202 /* Send first query sequence to each worker */
1204 if (m_msg0.dfile[0] && (fdata=fopen(m_msg0.dfile,"w"))!=NULL)
1205 fprintf(fdata,"%3d>%-50s\n",qlib,qm_msp0->libstr);
1208 pvm_initsend(PvmDataRaw);
1209 pvm_pkbyte((char *)qm_msp0,sizeof(qm_msg0),1);
1210 if (qm_msp0->n0 > 0) {
1211 pvm_pkbyte((char *)aa0p0,qm_msp0->n0+1+SEQ_PAD,1);
1212 if (m_msg0.ann_flg) pvm_pkbyte((char *)m_msp0->aa0a,qm_msp0->n0+1,1);
1214 for (node = FIRSTNODE; node < nnodes; node++)
1215 pvm_send(pinums[node],MSEQTYPE);
1218 for (node = FIRSTNODE; node < nnodes; node++) {
1219 MPI_Send(qm_msp0,sizeof(qm_msg0),MPI_BYTE,node,MSEQTYPE,MPI_COMM_WORLD);
1220 if (qm_msp0->n0 > 0) {
1221 MPI_Send(aa0p0,qm_msp0->n0+1+SEQ_PAD,MPI_BYTE,node,
1222 MSEQTYPE1,MPI_COMM_WORLD);
1223 if (m_msg0.ann_flg) {
1224 if (m_msp0->aa0a == NULL) {
1225 fprintf(stderr," m_msp0: %o/%oaa0a is null\n",m_msp0,m_msp0->aa0a);
1227 MPI_Send(m_msp0->aa0a,qm_msp0->n0+1,MPI_BYTE,node, MSEQTYPE2,MPI_COMM_WORLD);
1233 /* Get second query sequence (additional query sequences are read in
1236 m_msp1->n0 = qm_msp1->n0 =
1237 QGETLIB(aa0p1,MAXTST,q_bline, sizeof(q_bline),&qseek, &qlcont,q_file_p,&m_msp1->sq0off);
1238 strncpy(qm_msp1->libstr,q_bline,sizeof(qm_msg0.libstr)-20);
1239 qm_msp1->libstr[sizeof(qm_msg0.libstr)-21]='\0';
1240 if ((bp=strchr(qm_msp1->libstr,' '))!=NULL) *bp='\0';
1242 /* if annotations are included in sequence, remove them */
1243 if (m_msg0.ann_flg) {
1244 m_msp1->n0 = qm_msp1->n0 =
1245 ann_scan(aa0p1,qm_msp1->n0,m_msp1,m_msp1->qdnaseq);
1247 fprintf(stderr,"m_msp1->/aa0a is: %o/%o\n",m_msp1,m_msp1->aa0a);
1251 if (qm_msp1->n0 > 0 && m_msg0.term_code && !qlcont &&
1252 m_msg0.qdnaseq == SEQT_PROT &&
1253 aa0p1[m_msp1->n0-1]!=m_msg0.term_code) {
1254 aa0p1[m_msp1->n0++]=m_msg0.term_code;
1255 aa0p1[m_msp1->n0]=0;
1256 qm_msp1->n0 = m_msp1->n0;
1259 /* for ALTIVEC, must pad with 15 NULL's */
1260 if (m_msp1->n0 > 0) {
1261 for (i=0; i<SEQ_PAD+1; i++) {aa0p1[m_msp1->n0+i]=0;}
1265 qm_msp1->seqnm = qlib;
1267 last_params(aa0p1,m_msp1->n0,m_msp1,&pst,qm_msp1);
1269 sprintf(tmp_str," - %d %s", qm_msp1->n0, q_sqnam);
1270 if (strlen(qm_msp1->libstr) + strlen(tmp_str) >= sizeof(qm_msg0.libstr))
1271 qm_msp1->libstr[sizeof(qm_msg0.libstr)-strlen(tmp_str)-2] = '\0';
1272 strncat(qm_msp1->libstr,tmp_str,
1273 sizeof(qm_msg0.libstr)-strlen(qm_msp1->libstr)-1);
1274 qm_msp1->libstr[sizeof(qm_msg0.libstr)-1]='\0';
1276 naa0 = 0; /* reset node counter */
1278 /* sit in loop and collect results */
1286 bufid = pvm_recv(-1,curtype);
1287 pvm_bufinfo(bufid,NULL,NULL,&tid);
1288 pvm_upkbyte((char *)&bestr[0],sizeof(struct comstr)*(qres_bufsize+1),1);
1289 snode = tidtonode(tid);
1293 MPI_Recv(bestr,sizeof(struct comstr)*(qres_bufsize+1),
1294 MPI_BYTE,MPI_ANY_SOURCE,curtype,MPI_COMM_WORLD,&mpi_status);
1295 snode = mpi_status.MPI_SOURCE;
1298 nres = bestr[qres_bufsize].seqnm & ~FINISHED;
1301 fprintf(stderr,"%d results from %d\n",nres,snode);
1304 if (bestr[qres_bufsize].seqnm&FINISHED) { /* a worker is finished */
1307 /* fast_flag == 1 => send new sequences immediately */
1308 fast_flag = ((m_msp0->stages==1) && !(m_msp0->markx & MX_M9SUMM) &&
1309 (m_msp0->ashow == 0) && (m_msp0->last_calc_flg==0));
1310 /* send a new query sequence if no more processing required */
1313 pvm_initsend(PvmDataRaw);
1314 pvm_pkbyte((char *)qm_msp1,sizeof(qm_msg1),1);
1315 if (qm_msp1->n0 != -1) {
1316 pvm_pkbyte((char *)aa0p1,qm_msp1->n0+1+SEQ_PAD,1);
1317 if (m_msp1->ann_flg) pvm_pkbyte((char *)m_msp1->aa0a,qm_msp1->n0+1,1);
1319 pvm_send(tid,MSEQTYPE);
1322 MPI_Send(qm_msp1,sizeof(qm_msg1),MPI_BYTE,snode,MSEQTYPE,MPI_COMM_WORLD);
1323 if (qm_msp1->n0 != -1) {
1324 MPI_Send(aa0p1,qm_msp1->n0+1+SEQ_PAD,MPI_BYTE,snode,MSEQTYPE1,MPI_COMM_WORLD);
1325 if (m_msp1->ann_flg)
1326 MPI_Send(m_msp1->aa0a,qm_msp1->n0+1,MPI_BYTE,snode,MSEQTYPE2,MPI_COMM_WORLD);
1334 fprintf(stderr," unpacking %d from %d; nbest %d\n",nres,snode,nbest);
1337 /* this section is now more complex because can get groups of
1338 sequence results; e.g. forward and reverse frame */
1340 t_best = t_rbest = t_qrbest = -1;
1341 tm_escore = t_rescore = t_qrescore = FLT_MAX;
1342 for (ires = 0; ires < nres; ires++) {
1343 desptr = &ldes[bestr[ires].m_seqnm];
1345 /* save raw results */
1347 strncpy(tlibstr,desptr->bline,10);
1348 if ((bp=strchr(tlibstr,' '))!=NULL) *bp='\0';
1349 fprintf(fdata,"%-10s\t%4d\t%4d\t%d\t%4d\t%4d\t%4d\t%8ld\n",
1350 tlibstr,desptr->sfnum[0],desptr->n1,bestr[ires].frame,
1351 bestr[ires].score[0],bestr[ires].score[1],bestr[ires].score[2],
1355 i_score = bestr[ires].score[pst.score_ix];
1356 e_score = bestr[ires].escore;
1357 k_comp = bestr[ires].comp;
1358 k_H = bestr[ires].H;
1361 if (i_score > t_best) {tm_best = t_best = i_score;}
1362 if (e_score < tm_escore) tm_escore = e_score;
1364 if (m_msp0->qshuffle) {
1365 if (bestr[ires].qr_score > t_qrbest)
1366 t_qrbest = bestr[ires].qr_score;
1367 if (bestr[ires].qr_escore < t_qrescore)
1368 t_qrescore = bestr[ires].qr_escore;
1370 if (bestr[ires].frame==m_msp0->nitt1 &&
1371 nqstats < m_msp0->shuff_max &&
1372 bestr[ires].qr_score >= 0) {
1373 qstats[nqstats].n1 = t_n1; /* save the best score */
1374 qstats[nqstats].comp = bestr[ires].comp;
1375 qstats[nqstats].H = bestr[ires].H;
1376 qstats[nqstats].escore = t_qrescore;
1377 qstats[nqstats++].score = t_qrbest;
1378 t_qrbest = -1; /* reset t_qrbest, t_qrescore */
1379 t_qrescore = FLT_MAX;
1383 if (pst.zsflag >= 10 && bestr[ires].r_score > t_rbest) {
1384 t_rbest = bestr[ires].r_score;
1385 t_rescore = bestr[ires].r_escore;
1388 if (nstats < MAXSTATS) {
1389 if (bestr[ires].frame == m_msg0.nitt1) {
1390 stats[nstats].n1 = t_n1;
1391 stats[nstats].comp = k_comp;
1392 stats[nstats].H = k_H;
1394 if (pst.zsflag > 10) {
1396 tm_escore = t_rescore;
1398 t_rescore = FLT_MAX;
1400 stats[nstats].escore = tm_escore;
1401 stats[nstats++].score = tm_best;
1402 tm_escore = FLT_MAX;
1406 else if (pst.zsflag >=0) { /* nstats >= MAXSTATS, zsflag >=0 */
1408 pst.n0 = qm_msp0->n0;
1409 pst.zsflag_f = process_hist(stats,nstats,*m_msp0,pst,
1410 &m_msp0->hist, &m_msp0->pstat_void,0);
1413 for (i=0; i<nbest; i++) {
1414 bptr[i]->zscore = (*find_zp)(bptr[i]->score[pst.score_ix],
1415 bptr[i]->escore,bptr[i]->n1,
1416 bptr[i]->comp, m_msp0->pstat_void);
1420 if (!m_msp0->escore_flg) {
1421 jstats = nrand(kstats++);
1422 if (jstats < MAXSTATS) {
1423 stats[jstats].n1 = t_n1; /* save the best score */
1424 stats[jstats].comp = k_comp;
1425 stats[jstats].H = k_H;
1426 if (pst.zsflag >=10) t_best = t_rbest;
1427 stats[jstats].score = t_best;
1434 zscore=(*find_zp)(i_score,e_score,desptr->n1,k_comp,
1435 m_msp0->pstat_void);
1436 if (bestr[ires].frame == m_msg0.nitt1) {
1437 addhistz((*find_zp)(tm_best,tm_escore,t_n1,k_comp,
1438 m_msp0->pstat_void),
1440 t_best = t_rbest = -1;
1444 else zscore = (double) i_score;
1446 if (zscore > zbestcut) {
1447 if (nbest>=MAXBEST) {
1448 selectbestz(bptr, nbest-MAXBEST/4-1, nbest);
1450 zbestcut = bptr[nbest-1]->zscore;
1453 /* if zbestcut == -BIGNUM, bptr[] has not been reinitialized */
1454 else if (best_flag) bptr[nbest]=&best[nbest];
1456 bptr[nbest]->m_seqnm = bestr[ires].m_seqnm ;
1457 bptr[nbest]->seqnm = bestr[ires].seqnm;
1458 bptr[nbest]->score[0] = bestr[ires].score[0];
1459 bptr[nbest]->score[1] = bestr[ires].score[1];
1460 bptr[nbest]->score[2] = bestr[ires].score[2];
1461 bptr[nbest]->escore = bestr[ires].escore;
1462 bptr[nbest]->segnum = bestr[ires].segnum;
1463 bptr[nbest]->seglen = bestr[ires].seglen;
1464 bptr[nbest]->comp = bestr[ires].comp;
1465 bptr[nbest]->H = bestr[ires].H;
1466 bptr[nbest]->zscore = zscore;
1467 bptr[nbest]->wrkr = snode;
1468 bptr[nbest]->desptr = desptr;
1469 bptr[nbest]->lseek = desptr->lseek; /* needed for identifying alternate
1470 strand scores from same sequence */
1471 bptr[nbest]->n1 = desptr->n1;
1472 bptr[nbest]->frame = bestr[ires].frame;
1474 /* this was used when -m 9 info was calculated in 1st scan */
1476 bptr[nbest]->sw_score = bestr[ires].sw_score;
1477 if (bestr[ires].sw_score > -1) {
1479 bptr[nbest]->a_len = bestr[ires].a_len;
1480 bptr[nbest]->percent = bestr[ires].percent;
1481 bptr[nbest]->gpercent = bestr[ires].gpercent;
1482 bptr[nbest]->min0 = bestr[ires].min0;
1483 bptr[nbest]->min1 = bestr[ires].min1;
1484 bptr[nbest]->max0 = bestr[ires].max0;
1485 bptr[nbest]->max1 = bestr[ires].max1;
1486 bptr[nbest]->ngap_q = bestr[ires].ngap_q;
1487 bptr[nbest]->ngap_l = bestr[ires].ngap_l;
1490 bptr[nbest]->percent = -1.0;
1491 bptr[nbest]->min0 = bptr[nbest]->min1 = bptr[nbest]->max0 =
1492 bptr[nbest]->max1 = 0;
1499 if (naa0 < nnodes-FIRSTNODE) continue;
1503 /* get gstring2,3 - algorithm/parameter description */
1505 bufid = pvm_recv(pinums[FIRSTNODE],PARAMTYPE);
1506 pvm_upkbyte(gstring2,sizeof(gstring2),1);
1507 pvm_upkbyte(gstring3,sizeof(gstring3),1);
1511 MPI_Recv(gstring2,sizeof(gstring2),MPI_BYTE,FIRSTNODE,PARAMTYPE,
1512 MPI_COMM_WORLD,&mpi_status);
1513 MPI_Recv(gstring3,sizeof(gstring3),MPI_BYTE,FIRSTNODE,PARAMTYPE,
1514 MPI_COMM_WORLD,&mpi_status);
1517 /* ********************** */
1518 /* analyze the results */
1519 /* ********************** */
1522 if (nbest < 20 || pst.zsflag <= 0) {
1526 pst.n0 = qm_msp0->n0;
1527 pst.zsflag_f = process_hist(stats,nstats,*m_msp0,pst,
1528 &m_msp0->hist, &m_msp0->pstat_void,stats_done);
1530 for (i=0; i<nbest; i++)
1531 bptr[i]->zscore = (*find_zp)(bptr[i]->score[pst.score_ix],
1532 bptr[i]->escore, bptr[i]->n1,
1533 bptr[i]->comp, m_msp0->pstat_void);
1537 m_msp0->db.entries = ntt.entries;
1538 m_msp0->db.length = ntt.length;
1539 m_msp0->db.carry = ntt.carry;
1541 if (pst.zdb_size < 1) pst.zdb_size = ntt.entries;
1543 if (!qm_msp0->qshuffle) {
1544 last_stats(aa0p0, m_msp0->n0,
1545 stats,nstats, bptr,nbest, *m_msp0, pst,
1546 &m_msp0->hist, &m_msp0->pstat_void);
1549 last_stats(aa0p0, m_msp0->n0,
1550 qstats,nqstats, bptr,nbest, *m_msp0, pst,
1551 &m_msp0->hist, &m_msp0->pstat_void);
1554 if (m_msp0->last_calc_flg) {
1555 nbest = last_calc(bptr,nbest, *m_msp0, &pst,qm_msp0,
1556 m_msp0->pstat_void);
1559 sortbeste(bptr,nbest);
1560 scale_scores(bptr,nbest,m_msp0->db,pst,m_msp0->pstat_void);
1562 if (pst.zsflag >= 0 && bptr[0]->escore >= m_msg0.e_cut) goto no_results;
1564 /* else sortorder(bptr,nbest,wlsn,nnodes); */
1566 /* if more than one stage or markx==9, calculate opt scores or do alignment */
1567 /* send results to workers as available */
1569 if (m_msg0.stages > 1 || m_msg0.markx & MX_M9SUMM) {
1571 /* to determine how many sequences to re-align (either for
1572 do_opt() or calc_id() we need to modify m_msg.mshow to get
1573 the correct number of alignments */
1575 if (m_msg0.mshow_flg != 1 && pst.zsflag >= 0) {
1576 for (i=0; i<nbest && bptr[i]->escore< m_msg0.e_cut; i++) {}
1580 /* allocate space for a_struct info */
1581 if (m_msg0.markx & MX_M9SUMM && m_msg0.mshow > 0) {
1582 if ((aln_d_base=(struct a_struct *)
1583 calloc((size_t)m_msg0.mshow,sizeof(struct a_struct)))==NULL) {
1584 fprintf(stderr," cannot allocate a_struct %d\n", m_msg0.mshow);
1588 for (is = 0; is < m_msg0.mshow; is++ ) {
1589 bptr[is]->aln_d = &aln_d_base[is];
1593 do_stage2(bptr,m_msg0.mshow, *m_msp0, DO_OPT_FLG, qm_msp0);
1598 tddone = time(NULL);
1600 /* changed from >> to >>> because qm_msp0->libstr is missing '>' */
1601 fprintf (outfd, "%3d>>>%s\n", qlib,qm_msp0->libstr);
1603 /* make certain that m_msp0->n0, libstr are current */
1604 m_msp0->n0 = qm_msp0->n0;
1605 /* strncpy(m_msp0->libstr,qm_msp0->libstr,sizeof(m_msg0.libstr)); */
1607 prhist (outfd,*m_msp0,pst,m_msp0->hist,nstats,m_msp0->db,gstring2);
1609 if (bptr[0]->escore < m_msg0.e_cut) {
1611 showbest (outfd, bptr, nbest, qlib, m_msp0,pst,ntt,gstring2);
1613 if (m_msg0.markx & MX_M9SUMM) {
1614 fprintf(outfd,"\n>>>%s#%d %s%s, %d %s vs %s library\n",
1615 m_msg0.tname,qlib,qm_msp0->libstr,
1616 (m_msg0.revcomp ? "-":"\0"), qm_msp0->n0, m_msg0.sqnam,
1619 else if (m_msg0.markx & MX_M10FORM) {
1620 if ((bp=strchr(qm_msp0->libstr,' '))!=NULL) *bp = '\0';
1621 fprintf(outfd,"\n>>>%s#%d %s%s, %d %s vs %s library\n",
1622 m_msg0.tname,qlib,qm_msp0->libstr,
1623 (m_msg0.revcomp ? "-":"\0"), qm_msp0->n0, m_msg0.sqnam,
1625 if (bp!=NULL) *bp=' ';
1626 fprintf(outfd,"; mp_name: %s\n",argv[0]);
1627 fprintf(outfd,"; mp_ver: %s\n",mp_verstr);
1628 fprintf(outfd,"; mp_argv:");
1629 for (i=0; i<argc; i++)
1630 fprintf(outfd," %s",argv[i]);
1632 fputs(gstring3,outfd);
1633 fputs(hstring1,outfd);
1636 /* ashow is -1 if not set, -d 0 indicates no alignments, > 0 if set */
1637 /* if ashow is -1, m_msg.nshow (set by e_cut above) sets limit
1640 if (m_msp0->ashow != 0) {
1641 /* showalign needs m_msp->qtitle, so fill it in */
1642 strncpy(m_msp0->qtitle,qm_msp0->libstr,MAX_FN-1);
1643 m_msp0->qtitle[MAX_FN-1]='\0';
1644 showalign (outfd, bptr, nbest, qlib, *m_msp0, pst, gstring2);
1648 if (m_msg0.markx & (MX_M9SUMM + MX_M10FORM)) {
1649 fprintf(outfd,"\n>>>%s#%d %s%s, %d %s vs %s library\n",
1650 m_msg0.tname,qlib,qm_msp0->libstr,(m_msg0.revcomp ? "-":"\0"), qm_msg0.n0, m_msg0.sqnam,
1652 fprintf(outfd,">>>!!! No sequences with E() < %f\n",m_msg0.e_cut);
1654 else fprintf(outfd,"!! No sequences with E() < %f\n",m_msg0.e_cut);
1657 if (! (m_msg0.markx & (MX_M9SUMM + MX_M10FORM))) {
1658 fprintf(outfd,"/** search time: ");
1659 ptime(outfd,tdone-tprev);
1660 fprintf(outfd," **/\n");
1663 else if (m_msg0.markx & MX_M9SUMM) {
1664 if (aln_d_base != NULL) {
1665 free((void *)aln_d_base);
1668 fprintf(outfd,">>>***\n");
1669 fprintf(outfd,"/** %s **/\n",gstring2);
1670 fprintf(outfd,"/** %s **/\n",m_msp0->hist.stat_info);
1671 fprintf(outfd,">>><<<\n");
1673 else if (m_msg0.markx & MX_M10FORM) {
1674 fprintf(outfd,">>><<<\n");
1678 /* *********************** */
1679 /* end of analysis/display */
1680 /* *********************** */
1683 /* *********************** */
1684 /* start the next search */
1685 /* *********************** */
1687 if (fdata) { /* label the results file */
1688 fprintf(fdata,"/** %s **/\n",gstring2);
1689 fprintf(fdata,"%3d>%-50s\n",qlib-1,qm_msp1->libstr);
1693 if (m_msp1->escore_flg) { /* re-initialize some stats stuff before search */
1694 pst.zsflag_f = process_hist(stats,nstats,*m_msp1,pst,
1695 &m_msp1->hist,&m_msp1->pstat_void,0);
1698 else stats_done = 0;
1700 /* set up qstats if necessary - different queries have different qshuffle */
1701 if (m_msp1->qshuffle && qstats==NULL) {
1703 (struct stat_str *)calloc(m_msg0.shuff_max+1,sizeof(struct stat_str)))==NULL)
1704 s_abort ("Cannot allocate qstats struct","");
1707 nqstats = nstats = 0;
1709 /* send new qm_msp, sequence */
1712 pvm_initsend(PvmDataRaw);
1713 pvm_pkbyte((char *)qm_msp1,sizeof(qm_msg1),1);
1714 if (qm_msp1->n0 != -1) {
1715 pvm_pkbyte((char *)aa0p1,qm_msp1->n0+1+SEQ_PAD,1);
1716 if (m_msp1->ann_flg) {
1717 pvm_pkbyte((char *)m_msp1->aa0a,qm_msp1->n0+1,1);
1720 for (node = FIRSTNODE; node < nnodes; node++)
1721 pvm_send(pinums[node],MSEQTYPE);
1724 for (node=FIRSTNODE; node < nnodes; node++) {
1725 MPI_Send(qm_msp1,sizeof(qm_msg1),MPI_BYTE,node,MSEQTYPE,
1727 if (qm_msp1->n0 != -1) {
1728 MPI_Send(aa0p1,qm_msp1->n0+1+SEQ_PAD,MPI_BYTE,node,MSEQTYPE1,MPI_COMM_WORLD);
1729 if (m_msp1->ann_flg)
1730 MPI_Send(m_msp1->aa0a,qm_msp1->n0+1,MPI_BYTE,snode,MSEQTYPE2,MPI_COMM_WORLD);
1737 if (qm_msp1->n0 != -1) {
1739 qtt.length += qm_msp1->n0;
1743 /* ******************************** */
1744 /* flip m_msg, qm_msg, aa0 pointers */
1745 /* ******************************** */
1751 if (curtype == ONETYPE) {
1771 /* **********************************************************/
1772 /* all library sequences are done get next library sequence */
1773 /* **********************************************************/
1775 m_msp1->n0 = qm_msp1->n0 =
1776 QGETLIB(aa0p1,MAXTST,q_bline, sizeof(q_bline),&qseek, &qlcont,q_file_p,&m_msp1->sq0off);
1777 strncpy(qm_msp1->libstr,q_bline,sizeof(qm_msg0.libstr)-20);
1778 qm_msp1->libstr[sizeof(qm_msg0.libstr)-21]='\0';
1780 if ((qlib+1) >= m_msg0.ql_stop) { qm_msp1->n0 = m_msp1->n0 = -1;}
1782 if (qm_msp1->n0 > 0 && m_msg0.term_code && !qlcont &&
1783 m_msg0.qdnaseq==SEQT_PROT &&
1784 aa0p1[m_msp1->n0-1]!=m_msg0.term_code) {
1785 aa0p1[m_msp1->n0++]=m_msg0.term_code;
1786 aa0p1[m_msp1->n0]=0;
1787 qm_msp1->n0 = m_msp1->n0;
1790 /* for ALTIVEC, must pad with 15 NULL's */
1791 if (m_msg0.n0 > 0) {
1792 for (i=0; i<SEQ_PAD+1; i++) {aa00[m_msg0.n0+i]=0;}
1797 leng = strlen (qm_msp1->libstr);
1798 sprintf (&(qm_msp1->libstr[leng]), " %d %s", qm_msp1->n0, q_sqnam);
1800 sprintf(tmp_str," %d %s", qm_msp1->n0, q_sqnam);
1801 if (strlen(qm_msp1->libstr) + strlen(tmp_str) >= sizeof(qm_msg0.libstr))
1802 qm_msp1->libstr[sizeof(qm_msg0.libstr)-strlen(tmp_str)-2] = '\0';
1803 strncat(qm_msp1->libstr,tmp_str,
1804 sizeof(qm_msg0.libstr)-strlen(qm_msp1->libstr)-1);
1805 qm_msp1->libstr[sizeof(qm_msg0.libstr)-1]='\0';
1807 qm_msp1->seqnm = qlib;
1809 last_params(aa0p1,m_msp1->n0,m_msp1,&pst,qm_msp1);
1813 /* ******************** */
1814 /* end of library while */
1815 /* ******************** */
1819 if (m_msg0.markx & (MX_M9SUMM + MX_M10FORM)) fputs(">>>///\n",outfd);
1821 if (outfd!=stdout) printsum(stdout);
1831 } /* End of main program */
1837 char tstr1[26], tstr2[26];
1839 strncpy(tstr1,ctime(&tdstart),sizeof(tstr1));
1840 strncpy(tstr2,ctime(&tddone),sizeof(tstr1));
1841 tstr1[24]=tstr2[24]='\0';
1843 /* Print timing to output file as well */
1845 fprintf(fd, "\n%ld residues in %d query sequences\n", qtt.length, qtt.entries);
1848 db_tt = (double)qtt.carry*(double)LONG_MAX + (double)qtt.length;
1849 fprintf(fd, "\n%.0g residues in %d query sequences\n", db_tt, qtt.entries);
1853 fprintf(fd, "%ld residues in %ld library sequences\n", ntt.length, ntt.entries);
1856 db_tt = (double)ntt.carry*(double)LONG_MAX + (double)ntt.length;
1857 fprintf(fd, "%.6f residues in %ld library sequences\n", db_tt, ntt.entries);
1860 fprintf(fd," %d processors (%d workers) were used\n",
1861 nnodes+-FIRSTNODE+1,nnodes-FIRSTNODE);
1862 fprintf(fd," Pvcomplib [%s]\n start: %s done: %s\n",mp_verstr,tstr1,tstr2);
1863 fprintf(fd," Loading time: ");
1864 ptime(fd, tscan - tstart);
1865 fprintf (fd," Scan time: ");
1866 ptime (fd, tdone - tscan);
1868 fprintf (fd, "\nFunction used was %s [%s]\n", prog_func,verstr);
1876 tddone = time(NULL);
1878 if (outfd!=stdout) fprintf(outfd,"/*** interrupted ***/\n");
1879 fprintf(stderr,"/*** interrupted ***/\n");
1882 if (outfd!=stdout) printsum(outfd);
1885 for (i=FIRSTNODE; i<nnodes; i++) pvm_kill(pinums[i]);
1889 MPI_Abort(MPI_COMM_WORLD,1);