2 /* copyright (c) 1996, 1997, 1998, 1999 William R. Pearson and the
5 /* $Name: fa_34_26_5 $ - $Id: mshowalign.c,v 1.43 2007/01/08 15:38:46 wrp Exp $ */
7 /* mshowalign.c - show sequence alignments in pvcomplib */
10 this is a merged version of showalign.c that works properly with
11 both the comp_lib (serial, threaded) and PCOMPLIB parallel versions
14 In the serial and current threaded versions of the programs,
15 showalign gets a list of high scoring sequences and must
16 re_getlib() the sequence, do_walign(), and then calculate the
19 In the PCOMPLIB parallel versions, the worker programs do the
20 aligning, so showalign() must send them the appropriate messages to
21 have the alignment done, and then collect the alignment results
50 /* used to position the library sequence for re_getlib - also gets
52 #define RANLIB (m_fptr->ranlib)
54 extern struct lmf_str *
55 re_openlib(struct lmf_str *, int outtty);
58 re_getlib(unsigned char *aa1, int maxn, int maxt,
59 int loff, int cont, int term_code,
60 long *loffset, long *l_off,
61 struct lmf_str *m_fptr);
63 #include "drop_func.h"
68 extern void cal_coord(int n0, int n1, long sq0off, long loffset,
69 struct a_struct *aln);
71 void initseq(char **, char **, char **, char **, int);
72 void freeseq(char **, char **, char **, char **);
74 void do_show(FILE *fp, int n0, int n1, int score,
75 char *name0, char *name1, int nml,
76 struct mngmsg m_msg, struct pstruct pst,
77 char *seqc0, char *seqc0a, char *seqc1, char *seqca, int nc,
78 float percent, float gpercent, int lc,
79 struct a_struct *aln, long loffset);
81 extern void discons(FILE *fd, struct mngmsg m_msg, struct pstruct pst,
82 char *seqc0, char *seqc0a, char *seqc1, char *seqca,
84 int n0, int n1, char *name0, char *name1, int nml,
88 extern void disgraph(FILE *fd, int n0, int n1,
89 float percent, int score,
90 int min0, int min1, int max0, int max1, long sq0off,
91 char *name0, char *name1, int nml, int llen, int markx);
93 extern double zs_to_bit(double, int, int);
96 do_url1(FILE *, struct mngmsg, struct pstruct, char *, int,
97 struct a_struct , long);
103 static char l_name[200]; /* link name */
106 #define BBP_INFO(info) bbp->desptr->info
108 #define BBP_INFO(info) bbp->info
111 /* this version does not check for m_msg.e_cut because nshow/nbest has
112 already been set to limit on e_cut */
114 void showalign (FILE *fp,
116 unsigned char **aa0, unsigned char *aa1, int maxn,
118 struct beststr **bptr, int nbest, int qlib,
119 struct mngmsg m_msg, struct pstruct pst, char *gstring2
127 char bline[2048], *bl_ptr, *bp, fmt[40];
130 char name0[80], name0s[80], name1[200];
131 int istart = 0, istop, i = 0, ib, nml;
135 float percent, gpercent;
136 char *seqc0, *seqc0a, *seqc1, *seqca;
139 struct stage2_str liblist;
140 struct qmng_str qm_msg;
143 MPI_Status mpi_status;
147 struct lmf_str *m_fptr;
152 /* this function has its own copy of qm_msg, so we must fill it
154 qm_msg.n0 = m_msg.n0;
155 strncpy(qm_msg.libstr,m_msg.qtitle,sizeof(qm_msg.libstr));
158 /* set the name0,1 label length */
159 if (m_msg.markx & MX_M10FORM) nml = 12;
160 else nml = m_msg.nmlen;
162 if (strlen(m_msg.qtitle) > 0) {
163 if (m_msg.qtitle[0]=='>') strncpy(name0s,&m_msg.qtitle[1],sizeof(name0s));
164 else strncpy(name0s,m_msg.qtitle,sizeof(name0s));
167 strncpy(name0s,m_msg.tname,sizeof(name0s));
169 name0s[sizeof(name0s)-1]='\0';
171 if ((bp=strchr(name0s,' '))!=NULL) *bp='\0';
173 if (m_msg.revcomp) name0[nml-1]='-';
175 l_llen = m_msg.aln.llen;
176 if ((m_msg.markx & MX_M9SUMM) && m_msg.show_code != SHOW_CODE_ID) {
178 if (l_llen > 200) l_llen=200;
181 sprintf(fmt,"%s%%-%ds (%%d %s)\n",A_MARK,l_llen-5,m_msg.sqnam);
183 if (!(m_msg.markx&MX_M10FORM)) fprintf(fp,"\n");
185 if (m_msg.ashow < 0) m_msg.ashow = m_msg.nshow;
186 istart = 0; istop = min(min(nbest,m_msg.ashow),m_msg.nshow);
188 for (ib=istart; ib<istop; ib++) {
192 if (BBP_INFO(nsfnum) > 0 && sfn_cmp(m_msg.qsfnum,BBP_INFO(sfnum))) {
193 istop = min(istop+1,nbest);
197 if (bbp->score[0] <= 0) break;
199 if (m_msg.quiet==1 && pst.zsflag>=0
200 && bbp->escore < m_msg.e_low) continue;
203 /* get the alignment and score by re-aligning */
205 if ((m_fptr=re_openlib(bbp->m_file_p,!m_msg.quiet))==NULL)
208 /* get the description - do not "edit" it yet */
210 if (!(m_msg.markx & MX_M10FORM)){
211 if (m_msg.long_info) {tmp_len = sizeof(bline)-1;}
212 else {tmp_len = l_llen-5;}
213 RANLIB(bline,tmp_len,bbp->lseek,bbp->libstr,bbp->m_file_p);
217 RANLIB(bline,sizeof(bline),bbp->lseek,bbp->libstr,bbp->m_file_p);
218 bline[sizeof(bline)-1]='\0';
221 n1 = re_getlib(aa1,maxn,m_msg.maxt3,m_msg.loff,bbp->cont,m_msg.term_code,
222 &loffset,&l_off,bbp->m_file_p);
225 fprintf(stderr," library sequence: %s lengths differ: %d != %d\n",
227 fprintf(stderr, "offset is: %lld\n",bbp->lseek);
231 if (!bbp->have_ares) {
233 do_walign(aa0[bbp->frame],m_msg.n0, aa1, n1, bbp->frame, &pst,
234 f_str[bbp->frame], &bbp->a_res, &t_have_ares);
237 pre_cons(aa1,n1,bbp->frame,f_str[bbp->frame]);
240 aln_func_vals(bbp->frame, &m_msg.aln);
242 #else /* PCOMPLIB - get the alignment information from a worker */
244 /* we have a sequence that we need an alignment for -
245 send a message to the appropriate worker to produce an alignment
246 qm_msg.slist == 1 -> one alignment
247 qm_msg.s_func == DO_ALIGN_FLG -> use the alignment function
248 send mngmsg (MSEQTYPE)
249 then send number of sequence to be aligned
253 qm_msg.s_func = DO_ALIGN_FLG;
255 liblist.seqnm = bbp->seqnm;
256 liblist.frame = bbp->frame;
258 pvm_initsend(PvmDataRaw);
259 pvm_pkbyte((char *)&qm_msg,sizeof(struct qmng_str),1);
260 pvm_send(pinums[bbp->wrkr],MSEQTYPE);
262 pvm_initsend(PvmDataRaw);
263 pvm_pkbyte((char *)&liblist,sizeof(struct stage2_str),1);
264 pvm_send(pinums[bbp->wrkr],LISTTYPE);
267 MPI_Send(&qm_msg,sizeof(struct qmng_str),MPI_BYTE,bbp->wrkr,
268 MSEQTYPE,MPI_COMM_WORLD);
269 MPI_Send(&liblist,sizeof(struct stage2_str),MPI_BYTE,bbp->wrkr,
270 LISTTYPE,MPI_COMM_WORLD);
272 /* information should be sent */
273 /* pick up description */
274 strncpy(bline,bbp->desptr->bline,l_llen-5);
275 bline[l_llen-5]='\0';
276 #endif /* PCOMPLIB */
278 if (strlen(bline)==0) {
280 strncpy(&bline[1],m_msg.lname,l_llen-5);
281 bline[l_llen-5]='\0';
283 /* re-format bline */
284 while ((bp=strchr(bline,'\n'))!=NULL) *bp=' ';
285 if (m_msg.long_info) {
286 tmp_len = strlen(bline);
288 if (!(m_msg.markx & MX_M10FORM)) while (tmp_len > l_llen) {
289 for (i=l_llen; i>10; i--)
290 if (bl_ptr[i]==' ') {
298 bline[sizeof(bline)-1]='\0';
301 n1tot = (BBP_INFO(n1tot_p)) ? *BBP_INFO(n1tot_p) : bbp->n1;
303 strncpy(name1,bline,sizeof(name1));
305 if ((!m_msg.markx & MX_M10FORM)) name1[nml]='\0';
306 if ((bp = strchr(name1,' '))!=NULL) *bp = '\0';
308 /* l_name is used to build an HTML link from the bestscore line to
309 the alignment. It can also be used to discriminate multiple hits
310 from the same long sequence. Text must match that in showbest.c */
312 strncpy(name1,bline,sizeof(name1));
313 name1[sizeof(name1)-1]='\0';
314 if ((bp = strchr(name1,' '))!=NULL) *bp = '\0';
315 strncpy(l_name,name1,sizeof(l_name));
316 l_name[sizeof(l_name)-1]='\0';
317 if ((bp=strchr(&l_name[3],'|'))!=NULL) *bp='\0';
318 if (m_msg.nframe > 2) sprintf(&l_name[strlen(l_name)],"_%d",bbp->frame+1);
319 else if (m_msg.qframe >= 0 && bbp->frame == 1)
320 strncat(l_name,"_r",sizeof(l_name));
321 if (bbp->cont-1 > 0) {
322 sprintf(tmp_str,":%d",bbp->cont-1);
323 strncat(l_name,tmp_str,sizeof(l_name)-strlen(l_name));
326 if (!(m_msg.markx & MX_M10FORM)) name1[nml]='\0';
328 /* print out score information; */
330 if (m_msg.markx & MX_HTML ) {
331 fprintf (fp,"<A name=%s>\n<tt><pre>\n",l_name);
333 strncpy(name0,name0s,nml);
336 if (pst.zsflag%10 == 6) {
337 sprintf(info_str," comp: %.5f H: %.5f",bbp->comp,bbp->H);
339 else info_str[0]='\0';
341 if ((m_msg.markx & MX_ATYPE)!=7 && !(m_msg.markx & MX_M10FORM)) {
342 fprintf (fp, fmt,bp=bline,n1tot);
343 if (m_msg.nframe > 2)
344 fprintf (fp, "Frame: %d",bbp->frame+1);
345 else if (m_msg.nframe > 1)
346 fprintf (fp, "Frame: %c",(bbp->frame? 'r': 'f'));
347 else if (m_msg.qframe >= 0 && bbp->frame > 0 ) {
348 fputs("rev-comp",fp);
354 fprintf (fp, " %s: %3d", m_msg.alab[0],bbp->score[0]);
356 fprintf (fp, " %s: %3d", m_msg.alab[1],bbp->score[1]);
358 fprintf (fp, " %s: %3d", m_msg.alab[2],bbp->score[2]);
359 fprintf(fp,"%s",info_str);
361 fprintf (fp, " Z-score: %4.1f bits: %3.1f E(): %4.2g",
362 bbp->zscore,zs_to_bit(bbp->zscore,m_msg.n0,bbp->n1),bbp->escore);
365 else if (m_msg.markx & MX_M10FORM) {
366 fprintf(fp,">>%s\n",bline);
367 if (m_msg.qframe > -1) {
368 if (m_msg.nframe > 2) {
369 fprintf(fp,"; %s_frame: %d\n",m_msg.f_id0,bbp->frame+1);
372 fprintf(fp,"; %s_frame: %c\n",m_msg.f_id0,(bbp->frame > 0? 'r':'f'));
375 fprintf (fp, "; %s_%s: %3d\n", m_msg.f_id0,m_msg.alab[0],bbp->score[0]);
377 fprintf (fp,"; %s_%s: %3d\n", m_msg.f_id0,m_msg.alab[1],bbp->score[1]);
379 fprintf (fp,"; %s_%s: %3d\n", m_msg.f_id0,m_msg.alab[2],bbp->score[2]);
380 if (info_str[0]) fprintf(fp,"; %s_info: %s\n",m_msg.f_id0,info_str);
382 fprintf (fp,"; %s_z-score: %4.1f\n; %s_bits: %3.1f\n; %s_expect: %6.2g\n",
383 m_msg.f_id0,bbp->zscore,
384 m_msg.f_id0,zs_to_bit(bbp->zscore,m_msg.n0,bbp->n1),
385 m_msg.f_id0,bbp->escore);
390 /* get the sw_score, alignment information, get seqc0, seqc1 */
393 /* get alignment lengths, percents */
394 pvm_recv(pinums[bbp->wrkr],ALN1TYPE);
397 pvm_upkint(&maxc,1,1);
399 pvm_upkfloat(&percent,1,1);
400 pvm_upkfloat(&gpercent,1,1);
402 pvm_upkint(&bbp->sw_score,1,1);
403 pvm_upkbyte((char *)&m_msg.aln,sizeof(struct a_struct),1);
405 initseq(&seqc0, &seqc0a, &seqc1, &seqca, maxc);
407 pvm_recv(pinums[bbp->wrkr],ALN2TYPE);
408 pvm_upkbyte(seqc0,maxc,1);
409 if (m_msg.ann_flg) pvm_upkbyte(seqc0a,maxc,1);
410 pvm_upkbyte(seqc1,maxc,1);
411 pvm_upkbyte(seqca,maxc,1);
414 MPI_Recv(int_msg_b,4,MPI_INT,bbp->wrkr,ALN1TYPE,MPI_COMM_WORLD,
419 bbp->sw_score = int_msg_b[3];
420 MPI_Recv(&percent,1,MPI_FLOAT,bbp->wrkr,ALN2TYPE,MPI_COMM_WORLD,
422 MPI_Recv(&gpercent,1,MPI_FLOAT,bbp->wrkr,ALN2TYPE,MPI_COMM_WORLD,
424 MPI_Recv(&m_msg.aln,sizeof(struct a_struct),MPI_BYTE,
425 bbp->wrkr,ALN3TYPE,MPI_COMM_WORLD,&mpi_status);
427 initseq(&seqc0, &seqc0a, &seqc1, &seqca, maxc);
428 MPI_Recv(seqc0,maxc,MPI_BYTE,bbp->wrkr,ALN2TYPE,MPI_COMM_WORLD,&mpi_status);
430 MPI_Recv(seqc0a,maxc,MPI_BYTE,bbp->wrkr,ALN2TYPE,MPI_COMM_WORLD,&mpi_status);
431 MPI_Recv(seqc1,maxc,MPI_BYTE,bbp->wrkr,ALN3TYPE,MPI_COMM_WORLD,&mpi_status);
432 MPI_Recv(seqca,maxc,MPI_BYTE,bbp->wrkr,ALN3TYPE,MPI_COMM_WORLD,&mpi_status);
435 /* l_off is the coordinate of the first residue */
437 /* loffset is the offset of the aa1 in the full sequence */
438 loffset = bbp->desptr->loffset-l_off;
440 #else /* not PCOMPLIB */
442 /* estimate space for alignment consensus */
443 if (m_msg.aln.showall==1) {
444 maxc = bbp->a_res.nres + max(bbp->a_res.min0,bbp->a_res.min1)+
445 max((m_msg.n0-bbp->a_res.max0),(n1-bbp->a_res.max1))+4;
448 maxc = bbp->a_res.nres + 4*m_msg.aln.llen+4;
451 /* get space to put the sequence alignment consensus */
452 initseq(&seqc0, &seqc0a, &seqc1, &seqca, maxc);
454 /* build consensus from res, nres (done by workers if PCOMPLIB) */
455 if (!m_msg.ann_flg) {
456 nc=calcons(aa0[bbp->frame],m_msg.n0,aa1,n1,
457 &lc,&m_msg.aln, bbp->a_res, pst, seqc0, seqc1, seqca,
459 memset(seqc0a,' ',nc);
463 nc=calcons_a(aa0[bbp->frame],m_msg.aa0a,m_msg.n0,aa1,n1,
464 &lc,&m_msg.aln,bbp->a_res,pst, seqc0, seqc0a,
465 seqc1, seqca, m_msg.ann_arr,f_str[bbp->frame]);
468 /* PCOMPLIB workers return percent, gpercent, so calculate it here */
469 if (lc > 0) percent = (100.0*(float)m_msg.aln.nident)/(float)lc;
470 else percent = -1.00;
471 ngap = m_msg.aln.ngap_q + m_msg.aln.ngap_l;
473 if (lc-ngap> 0) gpercent =(100.0*(float)m_msg.aln.nident)/(float)(lc-ngap);
475 if (lc > 0) gpercent =(100.0*(float)m_msg.aln.nsim)/(float)lc;
477 else gpercent = -1.00;
480 if (max(strlen(seqc0),strlen(seqc1)) > nc) {
481 fprintf(stderr," mshowalign: nc/maxc: %d/%d seqc0/1: %u/%u\n",
482 nc,maxc,strlen(seqc0),strlen(seqc1));
485 /* here PCOMPLIB/comp_lib logic is the same */
488 if (bbp->sw_score < bbp->score[pst.score_ix]) {
489 fprintf(stderr," *** warning - SW score=%d < opt score=%d ***\n",
490 bbp->sw_score, bbp->score[pst.score_ix]);
494 cal_coord(m_msg.n0,bbp->n1,m_msg.sq0off,loffset+l_off-1,&m_msg.aln);
497 if (bbp->a_res.nres > 0)
499 do_show(fp, m_msg.n0, bbp->n1, bbp->sw_score, name0, name1, nml,
500 m_msg, pst, seqc0, seqc0a, seqc1, seqca,
501 nc, percent, gpercent, lc, &m_msg.aln,
504 if (m_msg.markx & MX_HTML) fprintf(fp,"</pre></tt>\n<hr>\n");
507 freeseq(&seqc0,&seqc0a,&seqc1, &seqca);
509 if (fp!=stdout) fprintf(fp,"\n");
512 void do_show(FILE *fp, int n0,int n1, int score,
513 char *name0, char *name1, int nml,
514 struct mngmsg m_msg, struct pstruct pst,
515 char *seqc0, char *seqc0a, char *seqc1, char *seqca, int nc,
516 float percent, float gpercent, int lc,
517 struct a_struct *aln, long loffset)
521 if (m_msg.markx & MX_AMAP && (m_msg.markx & MX_ATYPE)==7)
522 disgraph(fp, n0, n1, percent, score,
523 aln->amin0, aln->amin1, aln->amax0, aln->amax1, m_msg.sq0off,
524 name0, name1, nml, aln->llen, m_msg.markx);
525 else if (m_msg.markx & MX_M10FORM) {
526 if (pst.sw_flag && m_msg.arelv>0)
527 fprintf(fp,"; %s_score: %d\n",m_msg.f_id1,score);
528 fprintf(fp,"; %s_ident: %5.3f\n",m_msg.f_id1,percent/100.0);
530 fprintf(fp,"; %s_gident: %5.3f\n",m_msg.f_id1,gpercent/100.0);
532 fprintf(fp,"; %s_sim: %5.3f\n",m_msg.f_id1,gpercent/100.0);
535 fprintf(fp,"; %s_overlap: %d\n",m_msg.f_id1,lc);
536 discons(fp, m_msg, pst, seqc0, seqc0a, seqc1, seqca, nc,
537 n0, n1, name0, name1, nml, aln, loffset);
540 if (pst.sw_flag) fprintf(fp,"Smith-Waterman score: %d; ",score);
541 else fprintf(fp,"banded Smith-Waterman score: %d; ",score);
543 fprintf(fp," %6.3f%% identity (%6.3f%% ungapped) in %d %s overlap (%ld-%ld:%ld-%ld)\n",
544 percent,gpercent,lc,m_msg.sqnam,aln->d_start0,aln->d_stop0,
545 aln->d_start1,aln->d_stop1);
547 fprintf(fp," %6.3f%% identity (%6.3f%% similar) in %d %s overlap (%ld-%ld:%ld-%ld)\n",
548 percent,gpercent,lc,m_msg.sqnam,aln->d_start0,aln->d_stop0,
549 aln->d_start1,aln->d_stop1);
552 if (m_msg.markx & MX_HTML) {
553 do_url1(fp, m_msg, pst, l_name,n1,*aln,loffset);
556 if (m_msg.markx & MX_AMAP && (m_msg.markx & MX_ATYPE)!=7) {
560 if (m_msg.qdnaseq == SEQT_DNA && m_msg.ldnaseq== SEQT_PROT)
563 disgraph(fp, tmp, n1, percent, score,
564 aln->amin0, aln->amin1,
565 aln->amax0, aln->amax1,
567 name0, name1, nml, aln->llen,m_msg.markx);
570 discons(fp, m_msg, pst, seqc0, seqc0a, seqc1, seqca, nc,
571 n0, n1, name0, name1, nml, aln, loffset);
580 void /* initialize consensus arrays */
581 initseq(char **seqc0, char **seqc0a, char **seqc1, char **seqca, int seqsiz)
583 *seqc0=(char *)calloc((size_t)seqsiz*4,sizeof(char));
585 {fprintf(stderr,"cannot allocate consensus arrays %d\n",seqsiz);
587 *seqc0a=*seqc0 + seqsiz;
588 *seqc1=*seqc0a + seqsiz;
589 *seqca=*seqc1 + seqsiz;
592 void freeseq(char **seqc0, char **seqc0a, char **seqc1, char **seqca)