2 /* copyright (c) 1998, 1999 William R. Pearson and the U. of Virginia */
4 /* $Name: fa_34_26_5 $ - $Id: dropfx.c,v 1.68 2007/04/26 18:37:18 wrp Exp $ */
6 /* implements the fastx algorithm, see:
8 W. R. Pearson, T. Wood, Z. Zhang, A W. Miller (1997) "Comparison of
9 DNA sequences with protein sequences" Genomics 46:24-36
11 see dropnfa.c for better variable descriptions and comments
14 /* 18-Sept-2006 - remove global variables used for alignment */
16 /* 22-June-2006 - correct incorrect alignment coordinates generated
17 after pro_dna() on projected DNA region.
20 /* 9-May-2003 -> 3.46 changed lx_band to use projected protein
21 boundary end. this fixes some addressing issues on MacOSX, and
22 speeds up alignment on very long proteins
36 /* this must be consistent with upam.h */
38 #define NMAP MAXHASH+1
40 /* globals for fasta */
48 static char *verstr="3.5 Sept 2006";
50 static char *verstr="3.5an0 May 2006";
53 struct dstruct /* diagonal structure for saving current run */
55 int score; /* hash score of current match */
56 int start; /* start of current match */
57 int stop; /* end of current match */
58 struct savestr *dmax; /* location in vmax[] where best score data saved */
63 int score; /* pam score with segment optimization */
64 int score0; /* pam score of best single segment */
65 int gscore; /* score from global match */
66 int dp; /* diagonal of match */
67 int start; /* start of match in lib seq */
68 int stop; /* end of match in lib seq */
71 struct swstr { int H, E;};
73 struct bdstr { int CC, DD, CP, DP;};
78 struct sx_s {int C1, C2, C3, I1, I2, I3, flag; };
82 struct savestr vmax[MAXSAV]; /* best matches saved for one sequence */
83 struct savestr *vptr[MAXSAV];
84 struct savestr *lowmax;
87 int hmask; /* hash constants */
88 int *pamh1; /* pam based array */
89 int *pamh2; /* pam based kfact array */
90 int *link, *harr; /* hash arrays */
91 int kshft; /* shift width */
92 int nsav, lowscor; /* number of saved runs, worst saved run */
94 unsigned char *aa0x; /* contains translated codons 111222333*/
95 unsigned char *aa0y; /* contains translated codons 123123123*/
97 unsigned char *aa1x; /* contains translated codons 111222333 */
98 unsigned char *aa1y; /* contains translated codons 123123123 */
108 #include "drop_func.h"
110 static int dmatchx(const unsigned char *aa0, int n0,
111 const unsigned char *aa1, int n1,
112 int hoff, int window,
113 int **pam2, int gdelval, int ggapval, int gshift,
114 struct f_struct *f_str);
116 int shscore(unsigned char *aa0, int n0, int **pam2);
117 int saatran(const unsigned char *ntseq, unsigned char *aaseq, int maxs, int frame);
118 int spam (const unsigned char *aa0, const unsigned char *aa1,
119 struct savestr *dmax, int **pam2,
120 struct f_struct *f_str);
121 int sconn (struct savestr **v, int n,int cgap, int pgap, struct f_struct *f_str);
122 int lx_band(const unsigned char *prot_seq, int len_prot,
123 const unsigned char *dna_prot_seq, int len_dna_prot,
124 int **pam_matrix, int gopen, int gext,
125 int gshift, int start_diag, int width, struct f_struct *f_str);
128 update_code(char *al_str, int al_str_max, int op, int op_cnt, char *op_char);
130 extern void w_abort (char *p, char *p1);
132 /* initialize for fasta */
135 init_work (unsigned char *aa0, int n0,
136 struct pstruct *ppst,
137 struct f_struct **f_arg)
144 struct f_struct *f_str;
145 int ktup; /* word size examined */
146 int fact; /* factor used to scale ktup match value */
147 int kt1; /* ktup-1 */
148 int lkt; /* last ktup - initiall kt1, but can be increased
154 struct swstr *ss, *r_ss;
160 unsigned char *fd, *fs, *aa0x, *aa0y, *aa0s;
164 if (ppst->ext_sq_set) {
165 nsq = ppst->nsqx; ip = 1;
169 nsq = ppst->nsq; ip = 0;
173 f_str = (struct f_struct *)calloc(1,sizeof(struct f_struct));
175 btemp = 2 * ppst->param_u.fa.bestoff / 3 +
176 n0 / ppst->param_u.fa.bestscale +
177 ppst->param_u.fa.bkfact *
178 (ppst->param_u.fa.bktup - ppst->param_u.fa.ktup);
179 btemp = min (btemp, ppst->param_u.fa.bestmax);
180 if (btemp > 3 * n0) btemp = 3 * shscore(aa0,n0,ppst->pam2[0]) / 5;
182 ppst->param_u.fa.cgap = btemp + ppst->param_u.fa.bestoff / 3;
183 if (ppst->param_u.fa.optcut_set != 1)
185 ppst->param_u.fa.optcut = (btemp*5)/4;
187 ppst->param_u.fa.optcut = (btemp*4)/3;
191 ppst->param_u.fa.pgap = ppst->gdelval + ppst->ggapval;
193 ppst->param_u.fa.pgap = ppst->gdelval + 2*ppst->ggapval;
195 pamfact = ppst->param_u.fa.pamfact;
196 ktup = ppst->param_u.fa.ktup;
197 fact = ppst->param_u.fa.scfact * ktup;
201 else if (pamfact == -2)
204 for (i0 = 1, mhv = -1; i0 <=nsq; i0++)
205 if (hsq[i0] < NMAP && hsq[i0] > mhv) mhv = hsq[i0];
208 fprintf (stderr, " maximum hsq <=0 %d\n", mhv);
212 for (f_str->kshft = 0; mhv > 0; mhv /= 2)
218 for (i0 = 0; i0 < ktup; i0++) {
219 hv = hv << f_str->kshft;
222 f_str->hmask = (hmax >> f_str->kshft) - 1;
225 if ((f_str->harr = (int *) calloc (hmax, sizeof (int))) == NULL) {
226 fprintf (stderr, " cannot allocate hash array\n");
229 if ((f_str->pamh1 = (int *) calloc (nsq+1, sizeof (int))) == NULL) {
230 fprintf (stderr, " cannot allocate pamh1 array\n");
233 if ((f_str->pamh2 = (int *) calloc (hmax, sizeof (int))) == NULL) {
234 fprintf (stderr, " cannot allocate pamh2 array\n");
237 if ((f_str->link = (int *) calloc (n0, sizeof (int))) == NULL) {
238 fprintf (stderr, " cannot allocate hash link array");
243 if ((f_str->aa1x =(unsigned char *)calloc((size_t)ppst->maxlen+2,
244 sizeof(unsigned char)))
246 fprintf (stderr, "cannot allocate aa1x array %d\n", ppst->maxlen+2);
251 if ((f_str->aa1y =(unsigned char *)calloc((size_t)ppst->maxlen+2,
252 sizeof(unsigned char)))
254 fprintf (stderr, "cannot allocate aa1y array %d\n", ppst->maxlen+2);
260 if ((aa0x =(unsigned char *)calloc((size_t)maxn0,sizeof(unsigned char)))
262 fprintf (stderr, "cannot allocate aa0x array %d\n", maxn0);
268 if ((aa0y =(unsigned char *)calloc((size_t)maxn0,sizeof(unsigned char)))
270 fprintf (stderr, "cannot allocate aa0y array %d\n", maxn0);
277 for (itemp=0; itemp<3; itemp++) {
278 n0x = saatran(aa0,&aa0x[last_n0],n0,itemp);
280 for (i=0; i<n0x; i++) {
281 fprintf(stderr,"%c",aa[aa0x[last_n0+i]]);
282 if ((i%60)==59) fprintf(stderr,"\n");
284 fprintf(stderr,"\n");
289 /* fprintf(stderr,"\n"); */
291 for (itemp=0, fs=aa0x; itemp <3; itemp++,fs++) {
292 for (fd = &aa0y[itemp]; *fs!=EOSEQ; fd += 3, fs++) *fd = *fs;
296 /* now switch aa0 and aa0x for hashing functions */
297 /* this seems dangerous in threaded code, but only the pointer is changed,
298 not the data itself */
306 for (i0 = 0; i0 < hmax; i0++)
307 f_str->harr[i0] = -1;
308 for (i0 = 0; i0 < n0; i0++)
309 f_str->link[i0] = -1;
311 /* encode the aa0 array */
315 for (i0 = 0; i0 < min(lkt,n0); i0++) {
316 if (hsq[aa0[i0]] >= NMAP) {hv=phv=0; lkt=i0+ktup; continue;}
317 hv = (hv << f_str->kshft) + hsq[aa0[i0]];
318 phv += ppst->pam2[ip][aa0[i0]][aa0[i0]] * ktup;
321 for (; i0 < n0; i0++) {
322 if (hsq[aa0[i0]] >= NMAP) {
325 /* restart hv, phv calculation */
326 for (; (i0 < lkt || hsq[aa0[i0]]>=NMAP) && i0<n0; i0++) {
327 if (hsq[aa0[i0]] >= NMAP) {hv=phv=0; lkt = i0+ktup; continue;}
328 hv = (hv << f_str->kshft) + hsq[aa0[i0]];
329 phv += ppst->pam2[ip][aa0[i0]][aa0[i0]] * ktup;
333 hv = ((hv & f_str->hmask) << f_str->kshft) + hsq[aa0[i0]];
334 f_str->link[i0] = f_str->harr[hv];
335 f_str->harr[hv] = i0;
337 f_str->pamh2[hv] = (phv += ppst->pam2[ip][aa0[i0]][aa0[i0]] * ktup);
338 /* this check should always be true, but just in case */
339 if (hsq[aa0[i0-kt1]]<NMAP)
340 phv -= ppst->pam2[ip][aa0[i0 - kt1]][aa0[i0 - kt1]] * ktup;
342 else f_str->pamh2[hv] = fact * ktup;
346 /* done hashing, now switch aa0, aa0x back */
352 /* this has been modified from 0..<nsq to 1..<=nsq because the
353 pam2[0][0] is now undefined for consistency with blast
357 for (i0 = 1; i0 <= nsq; i0++)
358 f_str->pamh1[i0] = ppst->pam2[ip][i0][i0] * ktup;
360 for (i0 = 1; i0 <= nsq; i0++)
361 f_str->pamh1[i0] = fact;
363 f_str->ndo = 0; /* used to save time on diagonals with long queries */
366 if ((f_str->diag = (struct dstruct *) calloc ((size_t)MAXDIAG,
367 sizeof (struct dstruct)))==NULL) {
368 fprintf (stderr," cannot allocate diagonal arrays: %ld\n",
369 (long) MAXDIAG *sizeof (struct dstruct));
373 if ((f_str->diag = (struct dstruct *) calloc ((size_t)n0,
374 sizeof (struct dstruct)))==NULL) {
375 fprintf (stderr," cannot allocate diagonal arrays: %ld\n",
376 (long)n0*sizeof (struct dstruct));
382 if ((waa= (int *)malloc (sizeof(int)*(nsq+1)*n0)) == NULL) {
383 fprintf(stderr,"cannot allocate waa struct %3d\n",nsq*n0);
388 for (i=0; i<=nsq; i++) {
389 for (j=0;j<n0; j++) {
390 *pwaa = ppst->pam2[ip][i][aa0[j]];
396 if ((waa= (int *)malloc (sizeof(int)*(nsq+1)*n0)) == NULL) {
397 fprintf(stderr,"cannot allocate waa struct %3d\n",nsq*n0);
402 for (i=0; i<=nsq; i++) {
403 for (j=0;j<n0; j++) {
404 *pwaa = ppst->pam2[0][i][aa0[j]];
411 maxn0 = max(2*n0,MIN_RES);
413 /* maxn0 needs to be large enough to accomodate introns
414 for TFASTX. For all other functions, it will be
416 maxn0 = max(4*n0,MIN_RES);
418 if ((res = (int *)calloc((size_t)maxn0,sizeof(int)))==NULL) {
419 fprintf(stderr,"cannot allocate alignment results array %d\n",maxn0);
423 f_str->max_res = maxn0;
429 /* pstring1 is a message to the manager, currently 512 */
430 /* pstring2 is the same information, but in a markx==10 format */
432 get_param (struct pstruct *pstr, char *pstring1, char *pstring2)
435 char *pg_str="FASTX";
437 char *pg_str="TFASTX";
440 if (!pstr->param_u.fa.optflag)
442 sprintf (pstring1, "%s (%s) function [%s matrix (%d:%d:%d)%s] ktup: %d\n join: %d, gap-pen: %d/%d, shift: %d width: %3d",pg_str,verstr,
444 sprintf (pstring1, "%s (%s) function [%s matrix (o=%d:%d:%d:%d)%s] ktup: %d\n join: %d, open/ext: %d/%d, shift: %d width: %3d",pg_str,verstr,
446 pstr->pamfile, pstr->pam_h,pstr->pam_l,pstr->pam_xx,pstr->pam_xm,
447 (pstr->ext_sq_set) ? "xS":"\0",
448 pstr->param_u.fa.ktup, pstr->param_u.fa.cgap,
449 pstr->gdelval, pstr->ggapval, pstr->gshift,
450 pstr->param_u.fa.optwid);
453 sprintf (pstring1, "%s (%s) function [optimized, %s matrix (%d:%d:%d)%s] ktup: %d\n join: %d, opt: %d, gap-pen: %d/%d shift: %3d, width: %3d",pg_str,verstr,
455 sprintf (pstring1, "%s (%s) function [optimized, %s matrix (o=%d:%d:%d:%d)%s] ktup: %d\n join: %d, opt: %d, open/ext: %d/%d shift: %3d, width: %3d",pg_str,verstr,
457 pstr->pamfile, pstr->pam_h,pstr->pam_l,pstr->pam_xx, pstr->pam_xm,
458 (pstr->ext_sq_set) ? "xS":"\0",
459 pstr->param_u.fa.ktup, pstr->param_u.fa.cgap,
460 pstr->param_u.fa.optcut, pstr->gdelval, pstr->ggapval,
461 pstr->gshift,pstr->param_u.fa.optwid);
463 if (pstr->param_u.fa.iniflag) strcat(pstring1," init1");
465 if (pstr->zsflag==0) strcat(pstring1," not-scaled");
466 else if (pstr->zsflag==1) strcat(pstring1," reg.-scaled");
469 if (pstring2 != NULL) {
471 sprintf (pstring2, "; pg_name: %s\n; pg_ver: %s\n; pg_matrix: %s (%d:%d)%s\n\
472 ; pg_gap-pen: %d %d\n; pg_ktup: %d\n; pg_optcut: %d\n; pg_cgap: %d\n",
474 sprintf (pstring2, "; pg_name: %s\n; pg_ver: %s\n; pg_matrix: %s (%d:%d)%s\n\
475 ; pg_open_ext: %d %d\n; pg_ktup: %d\n; pg_optcut: %d\n; pg_cgap: %d\n",
477 pg_str,verstr,pstr->pamfile, pstr->pam_h,pstr->pam_l,
478 (pstr->ext_sq_set) ? "xS":"\0", pstr->gdelval,
479 pstr->ggapval,pstr->param_u.fa.ktup,pstr->param_u.fa.optcut,
480 pstr->param_u.fa.cgap);
485 close_work (const unsigned char *aa0, int n0,
486 struct pstruct *ppst,
487 struct f_struct **f_arg)
489 struct f_struct *f_str;
519 void do_fastx (const unsigned char *aa0, int n0,
520 const unsigned char *aa1, int n1,
521 struct pstruct *ppst, struct f_struct *f_str,
522 struct rstruct *rst, int *hoff)
524 int nd; /* diagonal array size */
529 register struct dstruct *dptr;
533 register struct dstruct *diagp;
538 struct dstruct *dpmax;
541 struct savestr *vmptr;
544 int ktup, kt1, *hsq, ip, lkt;
548 n0x32 = n0x31+1+(n0-n0x31-1)/2;
550 const unsigned char *fs;
552 int n1x31, n1x32, last_n1, itemp;
554 n1x32 = n1x31+1+(n1-n1x31-1)/2;
557 if (ppst->ext_sq_set) {
566 ktup = ppst->param_u.fa.ktup;
570 rst->score[0] = rst->score[1] = rst->score[2] = 0;
574 if (n0+n1+1 >= MAXDIAG) {
575 fprintf(stderr,"n0,n1 too large: %d, %d\n",n0,n1);
576 rst->score[0] = rst->score[1] = rst->score[2] = -1;
580 f_str->noff = n0 - 1;
590 dpmax = &f_str->diag[nd];
591 for (dptr = &f_str->diag[f_str->ndo]; dptr < dpmax;)
598 for (vmptr = f_str->vmax; vmptr < &f_str->vmax[MAXSAV]; vmptr++)
600 f_str->lowmax = f_str->vmax;
606 for (lpos = 0; (lpos < lkt || hsq[aa1[lpos]]>=NMAP) && lpos<n1; lpos++) {
607 if (hsq[aa1[lpos]]>=NMAP) {
608 lhval = 0; lkt=lpos+ktup; continue;
609 #ifdef ALLOCN0 /* reinitialize dptr */
610 dptr = &f_str->diag[lpos % nd];
616 lhval = ((lhval & f_str->hmask) << f_str->kshft) + hsq[aa1[lpos]];
620 diagp = &f_str->diag[f_str->noff + lkt];
621 for (; lpos < n1; lpos++, diagp++) {
622 /* if (hsq[aa1[lpos]]>=NMAP) {lhval = 0; continue;} */
623 if (hsq[aa1[lpos]]>=NMAP) {
625 while (lpos < n1 && hsq[aa1[lpos]]>=NMAP) {lpos++; diagp++;}
626 if (lpos >= n1) break;
629 lhval = ((lhval & f_str->hmask) << f_str->kshft) + hsq[aa1[lpos]];
630 for (tpos = f_str->harr[lhval]; tpos >= 0; tpos = f_str->link[tpos]) {
631 if ((tscor = (dptr = &diagp[-tpos])->stop) >= 0) {
633 lposn0 = f_str->noff + lpos;
634 for (; lpos < n1; lpos++, lposn0++) {
635 if (hsq[aa1[lpos]]>=NMAP) {lhval = 0; goto loopl;}
636 lhval = ((lhval & f_str->hmask) << f_str->kshft) + hsq[aa1[lpos]];
637 for (tpos = f_str->harr[lhval]; tpos >= 0; tpos = f_str->link[tpos]) {
638 dpos = lposn0 - tpos;
639 if ((tscor = (dptr = &f_str->diag[dpos % nd])->stop) >= 0) {
642 if ((tscor -= lpos) <= 0) { /* better to start over */
644 if ((tscor += (kfact = f_str->pamh2[lhval])) < 0 && f_str->lowscor < scor)
646 savemax (dptr, dpos, f_str);
648 savemax (dptr, f_str);
650 if ((tscor += scor) >= kfact) {
656 dptr->start = (dptr->stop = lpos) - kt1;
658 } /* continue current run in diagonal */
660 dptr->score += f_str->pamh1[aa0[tpos]];
665 dptr->score = f_str->pamh2[lhval];
666 dptr->start = (dptr->stop = lpos) - kt1;
671 /* reinitialize diag structure */
673 if ((dptr = &f_str->diag[lpos % nd])->score > f_str->lowscor) {
674 savemax (dptr, lpos, f_str);
683 for (tpos = 0, dpos = f_str->noff + n1 - 1; tpos < n0; tpos++, dpos--) {
684 if ((dptr = &f_str->diag[dpos % nd])->score > f_str->lowscor)
685 savemax (dptr, dpos, f_str);
688 for (dptr = f_str->diag; dptr < dpmax;) {
689 if (dptr->score > f_str->lowscor) savemax (dptr, f_str);
698 at this point all of the elements of aa1[lpos]
699 have been searched for elements of aa0[tpos]
700 with the results in diag[dpos]
703 for (nsave = 0, vmptr = f_str->vmax; vmptr < &f_str->vmax[MAXSAV]; vmptr++)
706 fprintf(stderr,"0: %4d-%4d 1: %4d-%4d dp: %d score: %d\n",
707 f_str->noff+vmptr->start-vmptr->dp,
708 f_str->noff+vmptr->stop-vmptr->dp,
709 vmptr->start,vmptr->stop,
710 vmptr->dp,vmptr->score);
712 if (vmptr->score > 0) {
713 vmptr->score = spam (aa0, aa1, vmptr, ppst->pam2[ip], f_str);
714 f_str->vptr[nsave++] = vmptr;
719 rst->score[0] = rst->score[1] = rst->score[2] = 0;
724 /* FASTX code here to modify the start, stop points for
725 the three phases of the translated protein sequence
728 fprintf(stderr,"n0x: %d; n0x31:%d; n0x32: %d\n",n0,n0x31,n0x32);
729 for (ib=0; ib<nsave; ib++) {
730 fprintf(stderr,"0: %4d-%4d 1: %4d-%4d dp: %d score: %d\n",
731 f_str->noff+f_str->vptr[ib]->start-f_str->vptr[ib]->dp,
732 f_str->noff+f_str->vptr[ib]->stop-f_str->vptr[ib]->dp,
733 f_str->vptr[ib]->start,f_str->vptr[ib]->stop,
734 f_str->vptr[ib]->dp,f_str->vptr[ib]->score);
737 fprintf(stderr,"---\n");
739 for (ib=0; ib<nsave; ib++) {
740 if (f_str->noff-f_str->vptr[ib]->dp+f_str->vptr[ib]->start >= n0x32)
741 f_str->vptr[ib]->dp += n0x32;
742 if (f_str->noff-f_str->vptr[ib]->dp +f_str->vptr[ib]->start >= n0x31)
743 f_str->vptr[ib]->dp += n0x31;
747 for (ib=0; ib<nsave; ib++) {
748 fprintf(stderr,"0: %4d-%4d 1: %4d-%4d dp: %d score: %d\n",
749 f_str->noff+f_str->vptr[ib]->start-f_str->vptr[ib]->dp,
750 f_str->noff+f_str->vptr[ib]->stop-f_str->vptr[ib]->dp,
751 f_str->vptr[ib]->start,f_str->vptr[ib]->stop,
752 f_str->vptr[ib]->dp,f_str->vptr[ib]->score);
757 /* TFASTX code here to modify the start, stop points for
758 the three phases of the translated protein sequence
759 TFASTX modifies library start points, rather than
764 fprintf(stderr,"n0: %d; noff: %d; n1: %d; n1x31: %d n1x32 %d\n",n0, f_str->noff,n1,n1x31,n1x32);
765 for (ib=0; ib<nsave; ib++) {
766 fprintf(stderr,"0: %4d-%4d 1: %4d-%4d dp: %d score: %d\n",
767 f_str->noff+f_str->vptr[ib]->start-f_str->vptr[ib]->dp,
768 f_str->noff+f_str->vptr[ib]->stop-f_str->vptr[ib]->dp,
769 f_str->vptr[ib]->start,f_str->vptr[ib]->stop,
770 f_str->vptr[ib]->dp,f_str->vptr[ib]->score);
773 fprintf(stderr,"---\n");
776 for (ib=0; ib<nsave; ib++) {
777 if (f_str->vptr[ib]->start >= n1x32) {
778 f_str->vptr[ib]->start -= n1x32;
779 f_str->vptr[ib]->stop -= n1x32;
780 f_str->vptr[ib]->dp -= n1x32;
782 if (f_str->vptr[ib]->start >= n1x31) {
783 f_str->vptr[ib]->start -= n1x31;
784 f_str->vptr[ib]->stop -= n1x31;
785 f_str->vptr[ib]->dp -= n1x31;
790 for (ib=0; ib<nsave; ib++) {
791 fprintf(stderr,"0: %4d-%4d 1: %4d-%4d dp: %d score: %d\n",
792 f_str->noff+f_str->vptr[ib]->start-f_str->vptr[ib]->dp,
793 f_str->noff+f_str->vptr[ib]->stop-f_str->vptr[ib]->dp,
794 f_str->vptr[ib]->start,f_str->vptr[ib]->stop,
795 f_str->vptr[ib]->dp,f_str->vptr[ib]->score);
801 scor = sconn (f_str->vptr, nsave, ppst->param_u.fa.cgap,
802 ppst->param_u.fa.pgap, f_str);
804 for (vmptr=f_str->vptr[0],ib=1; ib<nsave; ib++)
805 if (f_str->vptr[ib]->score > vmptr->score) vmptr=f_str->vptr[ib];
807 /* kssort (f_str->vptr, nsave); */
809 rst->score[1] = vmptr->score; /* best single score - init1*/
810 rst->score[0] = max (scor, vmptr->score); /* initn */
811 rst->score[2] = rst->score[0]; /* initn */
813 my_hoff=f_str->noff - vmptr->dp;
817 fprintf(stderr," Long n1: %d\n",n1);
821 if (ppst->param_u.fa.optflag) {
822 if (rst->score[0] > ppst->param_u.fa.optcut) {
824 rst->score[2] = dmatchx(aa0, n0,aa1,n1,my_hoff,
825 ppst->param_u.fa.optwid, ppst->pam2[ip],
826 ppst->gdelval,ppst->ggapval,ppst->gshift,f_str);
828 /* generate f_str->aa1y */
830 for (i=0; i<n1; i++) {
831 fputc(ppst->sq[aa1[i]],stderr);
832 if (i%60==59) fputc('\n',stderr);
834 fprintf(stderr,"\n-----\n");
836 for (fs=aa1,itemp=0; itemp <3; itemp++,fs++) {
837 for (fd= &f_str->aa1y[itemp]; *fs!=EOSEQ; fd += 3, fs++) *fd = *fs;
842 for (i=0; i<n1; i++) {
843 fputc(ppst->sq[f_str->aa1y[i]],stderr);
844 if (i%60==59) fputc('\n',stderr);
847 rst->score[2] = dmatchx(aa0, n0, aa1, n1, my_hoff=vmptr->dp-f_str->noff,
848 ppst->param_u.fa.optwid, ppst->pam2[ip],
849 ppst->gdelval,ppst->ggapval,ppst->gshift,f_str);
856 /* returns rst.score[0] - initn
861 void do_work (const unsigned char *aa0, int n0,
862 const unsigned char *aa1, int n1,
864 struct pstruct *ppst, struct f_struct *f_str,
865 int qr_flg, struct rstruct *rst)
868 int last_n1, itx, itt, n10, i;
872 /* aa0 has a protein sequence */
873 /* aa1 has a raw DNA sequence */
878 for (itx= itt*3; itx< itt*3+3; itx++) {
879 n10 = saatran(aa1,&aa1x[last_n1],n1,itx);
881 fprintf(stderr," itt %d itx: %d\n",itt,itx);
882 for (i=0; i<n10; i++) {
883 fprintf(stderr,"%c",aa[f_str->aa1x[last_n1+i]]);
884 if ((i%60)==59) fprintf(stderr,"\n");
886 fprintf(stderr,"\n");
893 rst->score[0] = rst->score[1] = rst->score[2] = 0;
895 rst->segnum = rst->seglen = 1;
898 do_fastx (f_str->aa0x, n0, aa1, n1, ppst, f_str, rst, &hoff);
900 do_fastx (aa0, n0, f_str->aa1x, n10, ppst, f_str, rst, &hoff);
904 void do_opt (const unsigned char *aa0, int n0,
905 const unsigned char *aa1, int n1,
907 struct pstruct *ppst,
908 struct f_struct *f_str,
911 int optflag, tscore, hoff;
913 optflag = ppst->param_u.fa.optflag;
914 ppst->param_u.fa.optflag = 1;
917 do_fastx (f_str->aa0x, n0, aa1, n1, ppst, f_str, rst, &hoff);
919 do_fastx (aa0, n0, aa1, n1, ppst, f_str, rst, &hoff);
922 ppst->param_u.fa.optflag = optflag;
927 savemax (dptr, dpos, f_str)
928 register struct dstruct *dptr;
930 struct f_struct *f_str;
932 register struct savestr *vmptr;
937 savemax (dptr, f_str)
938 register struct dstruct *dptr;
939 struct f_struct *f_str;
942 register struct savestr *vmptr;
945 dpos = (int) (dptr - f_str->diag);
949 /* check to see if this is the continuation of a run that is already saved */
951 if ((vmptr = dptr->dmax) != NULL && vmptr->dp == dpos &&
952 vmptr->start == dptr->start)
954 vmptr->stop = dptr->stop;
955 if ((i = dptr->score) <= vmptr->score)
958 if (vmptr != f_str->lowmax)
963 i = f_str->lowmax->score = dptr->score;
964 f_str->lowmax->dp = dpos;
965 f_str->lowmax->start = dptr->start;
966 f_str->lowmax->stop = dptr->stop;
967 dptr->dmax = f_str->lowmax;
970 for (vmptr = f_str->vmax; vmptr < &f_str->vmax[MAXSAV]; vmptr++)
971 if (vmptr->score < i)
974 f_str->lowmax = vmptr;
979 int spam (const unsigned char *aa0, const unsigned char *aa1,
980 struct savestr *dmax, int **pam2,
981 struct f_struct *f_str)
986 int start, stop, score;
988 const unsigned char *aa0p, *aa1p;
990 aa1p = &aa1[lpos = dmax->start];
991 aa0p = &aa0[lpos - dmax->dp + f_str->noff];
994 tot = curv.score = maxv.score = 0;
995 for (; lpos <= dmax->stop; lpos++) {
996 tot += pam2[*aa0p++][*aa1p++];
997 if (tot > curv.score) {
1002 if (curv.score > maxv.score) {
1003 maxv.start = curv.start;
1004 maxv.stop = curv.stop;
1005 maxv.score = curv.score;
1007 tot = curv.score = 0;
1008 curv.start = lpos+1;
1012 if (curv.score > maxv.score) {
1013 maxv.start = curv.start;
1014 maxv.stop = curv.stop;
1015 maxv.score = curv.score;
1018 /* if (maxv.start != dmax->start || maxv.stop != dmax->stop)
1019 printf(" new region: %3d %3d %3d %3d\n",maxv.start,
1020 dmax->start,maxv.stop,dmax->stop);
1022 dmax->start = maxv.start;
1023 dmax->stop = maxv.stop;
1030 int sconn (struct savestr **v, int n,
1031 int cgap, int pgap, struct f_struct *f_str)
1038 } *start, *sl, *sj, *so, sarr[MAXSAV];
1039 int lstart, tstart, plstop, ptstop;
1041 /* sort the score left to right in lib pos */
1047 /* for the remaining runs, see if they fit */
1049 for (i = 0, si = 0; i < n; i++)
1052 /* if the score is less than the gap penalty, it never helps */
1053 if (v[i]->score < cgap)
1055 lstart = v[i]->start;
1056 tstart = lstart - v[i]->dp + f_str->noff;
1058 /* put the run in the group */
1060 sarr[si].score = v[i]->score;
1061 sarr[si].next = NULL;
1063 /* if it fits, then increase the score */
1064 for (sl = start; sl != NULL; sl = sl->next)
1066 plstop = sl->vp->stop;
1067 ptstop = plstop - sl->vp->dp + f_str->noff;
1068 if (plstop < lstart+XFACT && ptstop < tstart+XFACT) {
1069 sarr[si].score = sl->score + v[i]->score + pgap;
1074 /* now recalculate where the score fits */
1078 for (sj = start, so = NULL; sj != NULL; sj = sj->next)
1080 if (sarr[si].score > sj->score)
1084 so->next = &sarr[si];
1095 return (start->score);
1102 struct savestr *v[];
1106 struct savestr *tmp;
1108 for (gap = n / 2; gap > 0; gap /= 2)
1109 for (i = gap; i < n; i++)
1110 for (j = i - gap; j >= 0; j -= gap)
1112 if (v[j]->score >= v[j + gap]->score)
1122 struct savestr *v[];
1126 struct savestr *tmp;
1128 for (gap = n / 2; gap > 0; gap /= 2)
1129 for (i = gap; i < n; i++)
1130 for (j = i - gap; j >= 0; j -= gap)
1132 if (v[j]->start <= v[j + gap]->start)
1141 dmatchx(const unsigned char *aa0, int n0,
1142 const unsigned char *aa1, int n1,
1143 int hoff, int window,
1144 int **pam2, int gdelval, int ggapval, int gshift,
1145 struct f_struct *f_str)
1151 return lx_band(aa1,n1,f_str->aa0y,n0,
1153 #ifdef OLD_FASTA_GAP
1161 return lx_band(aa0,n0,f_str->aa1y,n1,
1163 #ifdef OLD_FASTA_GAP
1174 init_row(struct sx_s *row, int sp) {
1176 for (i = 0; i < sp; i++) {
1177 row[i].C1 = row[i].I1 = 0;
1178 row[i].C2 = row[i].I2 = 0;
1179 row[i].C3 = row[i].I3 = 0;
1185 lx_band(const unsigned char *prot_seq, /* array with protein sequence numbers*/
1186 int len_prot, /* length of prot. seq */
1187 const unsigned char *dna_prot_seq, /* translated DNA sequence numbers*/
1188 int len_dna_prot, /* length trans. seq. */
1189 int **pam_matrix, /* scoring matrix */
1190 int gopen, int gext, /* gap open, gap extend penalties */
1191 int gshift, /* frame-shift penalty */
1192 int start_diag, /* start diagonal of band */
1193 int width, /* width for band alignment */
1194 struct f_struct *f_str)
1197 int i, j, bd, bd1, x1, sp, p1=0, p2=0, end_prot;
1198 int sc, del, best = 0, cd,ci, e1, e2, e3, cd1, cd2, cd3, f, gg;
1200 const unsigned char *dp;
1201 register struct sx_s *ap, *aq;
1206 if (f_str->cur == NULL)
1207 f_str->cur = (struct sx_s *) ckalloc(sizeof(struct sx_s)*sp);
1209 init_row(f_str->cur, sp);
1212 if (start_diag %3 !=0) start_diag = start_diag/3-1;
1213 else start_diag = start_diag/3;
1217 if (width % 3 != 0) width = width/3+1;
1218 else width = width /3;
1221 /* currently, this code assumes that the DNA sequence is longer than the
1222 protein sequence. This is not always true. len_prot in the loop below
1223 should be decreased to the projection of the DNA on the protein */
1225 x1 = start_diag; /* x1 = lower bound of DNA */
1228 end_prot = max(0,-width-start_diag) + (len_dna_prot+5)/3 + width;
1229 end_prot = min(end_prot,len_prot);
1231 /* i counts through protein sequence, x1 through DNAp */
1233 for (i = max(0, -width-start_diag), x1+=i; i < end_prot; i++, x1++) {
1234 bd = min(x1+width, len_dna_prot/3); /* upper bound of band */
1235 bd1 = max(0,x1); /* lower bound of band */
1236 wt = pam_matrix[prot_seq[i]];
1237 del = 1-x1; /*adjustment*/
1241 ap = &f_str->cur[bd1];
1243 e1 = f_str->cur[bd1-1].C3;
1247 for (dp = &dna_prot_seq[(bd1-del)*3]; ap < &f_str->cur[bd]; ap++) {
1248 sc = max(max(e1, (e3=ap->C2))-gshift, e2)+wt[*dp++];
1249 if (cd1 > sc) sc = cd1;
1251 if ((ci = aq->I1) > 0) {
1252 if (sc < ci) { ap->C1 = ci; ap->I1 = ci-gext;}
1257 if (sc > best) best =sc;
1258 if (cd1 < sc) cd1 = sc;
1259 ap->I1 = max(ci-gext, sc);
1260 } else ap->I1 = ci-gext;
1264 ap->I1 = ap->C1 = 0;
1266 ap->C1 = sc; sc-=gg;
1268 if (sc > best) best =sc;
1269 if (cd1 < sc) cd1 = sc;
1274 sc = max(max(e2, (e1=ap->C3))-gshift, e3)+wt[*dp++];
1275 if (cd2 > sc) sc = cd2;
1277 if ((ci = aq->I2) > 0) {
1278 if (sc < ci) { ap->C2 = ci; ap->I2 = ci-gext;}
1283 if (sc > best) best =sc;
1284 if (cd2 < sc) cd2 = sc;
1285 ap->I2 = max(ci-gext, sc);
1290 ap->I2 = ap->C2 = 0;
1292 ap->C2 = sc; sc-=gg;
1294 if (sc > best) best =sc;
1295 if (cd2 < sc) cd2 = sc;
1300 sc = max(max(e3, (e2=aq->C1))-gshift, e1)+wt[*dp++];
1301 if (cd3 > sc) sc = cd3;
1303 if ((ci = aq++->I3) > 0) {
1304 if (sc < ci) { ap->C3 = ci; ap->I3 = ci-gext;}
1309 if (sc > best) best =sc;
1310 if (cd3 < sc) cd3 = sc;
1311 ap->I3 = max(ci-gext, sc);
1316 ap->I3 = ap->C3 = 0;
1318 ap->C3 = sc; sc-=gg;
1320 if (sc > best) best =sc;
1321 if (cd3 < sc) cd3 = sc;
1328 /* printf("The best score is %d\n", best); */
1329 return best+gopen+gext;
1332 /* ckalloc - allocate space; check for success */
1333 void *ckalloc(size_t amount)
1337 if ((p = (void *)malloc( (size_t)amount)) == NULL)
1338 w_abort("Ran out of memory.","");
1342 /* calculate the 100% identical score */
1344 shscore(unsigned char *aa0, int n0, int **pam2)
1347 for (i=0,sum=0; i<n0; i++)
1348 sum += pam2[aa0[i]][aa0[i]];
1356 /* code above is to convert sequence into numbers */
1358 typedef struct mat *match_ptr;
1360 typedef struct mat {
1369 typedef state *state_ptr;
1371 typedef struct st_s { int C, I, D;} *st_ptr;
1373 /* static st_ptr up=NULL, down, tp; */
1374 /* static int *st_up; */
1375 /* static int gop, gext, shift; */
1377 void *ckalloc(size_t);
1378 static match_ptr small_global(), global();
1379 static int local_align(), find_best();
1380 static void init_row2(), init_ROW();
1383 pro_dna(const unsigned char *prot_seq, /* array with prot. seq. numbers*/
1384 int len_prot, /* length of prot. seq */
1385 const unsigned char *dna_prot_seq, /* trans. DNA seq. numbers*/
1386 int len_dna_prot, /* length trans. seq. */
1387 int **pam_matrix, /* scoring matrix */
1388 int gopen, int gex, /* gap open, gap extend penalties */
1389 int gshift, /* frame-shift penalty */
1391 struct a_res_str *a_res) /* alignment info */
1393 match_ptr align, ap, aq;
1394 int x, y, ex, ey, i, score;
1396 st_ptr up, down, tp;
1398 /* these globals removed */
1399 /* gext = gex; gop = gopen; shift = gshift; */
1401 /* for fastx (but not tfastx), these could be moved into init_work(),
1402 and done only once */
1404 up = (st_ptr) ckalloc(sizeof(struct st_s)*(len_dna_prot+10));
1405 down = (st_ptr) ckalloc(sizeof(struct st_s)*(len_dna_prot+10));
1406 tp = (st_ptr) ckalloc(sizeof(struct st_s)*(len_dna_prot+10));
1408 /*local alignment find the best local alignment x (prot) and y (DNA)
1409 is the starting position of the best local alignment
1410 and ex (prot) ey (DNA) is the ending position */
1411 score= local_align(&x, &y, &ex, &ey, pam_matrix,
1413 dna_prot_seq, len_dna_prot,
1414 prot_seq, len_prot, up, down);
1416 /* this is very strange, since local_align initialized up, down */
1417 up += 3; down += 3; tp += 3;
1419 /* x, y - start in prot, dna_prot */
1420 a_res->min0 = x; /* prot */
1421 a_res->max0 = ex; /* prot */
1423 a_res->min1 = y; /* DNA-prot */
1424 a_res->max1 = ey; /* DNA-prot */
1426 align = global(x, y, ex, ey, pam_matrix, gopen, gex, gshift,
1427 dna_prot_seq, prot_seq, 0, 0, &up, &down, &tp);
1429 alignment = a_res->res;
1431 /* from earlier version */
1432 /* alignment[0] = x; */ /* start of alignment in prot */
1433 /* alignment[1] = y; */ /* start of alignment in DNA */
1435 for (ap = align, i= 0; ap; i++) {
1436 if (i < max_res) {alignment[i] = ap->l;}
1437 aq = ap->next; free(ap); ap = aq;
1441 fprintf(stderr," alignment truncated: %d/%d\n", max_res,i);
1444 up = &up[-3]; down = &down[-3]; tp = &tp[-3];
1445 free(up); free(tp); free(down);
1446 /* free(st_up); */ /* moved into local align */
1448 a_res->nres = i; /* i has the length of the alignment */
1453 swap(void **a, void **b) {
1462 local alignment find the best local alignment x and y
1463 is the starting position of the best local alignment
1464 and ex ey is the ending position
1467 local_align(int *x, int *y, int *ex, int *ey,
1468 int **wgts, int gop, int gext, int shift,
1469 unsigned char *dnap, int ld,
1470 unsigned char *pro, int lp,
1471 st_ptr up, st_ptr down) {
1473 int i, j, score, x1,x2,x3,x4, e1, e2 = 0, e3,
1474 sc, del, e, best = 0, *wt, cd, ci;
1475 state_ptr cur_st, last_st, cur_i_st;
1478 int *st_up, *cur_d_st;
1481 Array rowiC store the best scores of alignment ending at a position
1482 Arrays rowiD, and rowiI store the best scores of alignment ending
1483 at a position with a deletion or insrtion
1484 Arrays sti stores the starting position of the best alignment whose
1485 score stored in the corresponding row array.
1486 The program stores two rows to complete the computation, same is
1487 for the global alignment routine.
1490 /* for fastx (but not tfastx), this could be moved into init_work(),
1491 and done only once */
1492 st_up = (int *) ckalloc(sizeof(int)*(ld+10));
1493 init_row2(st_up, ld+5);
1496 init_ROW(up, ld+1); /* set to zero */
1497 init_ROW(down, ld+1); /* set to zero */
1503 /* for fastx (but not tfastx), these could be moved into init_work(),
1504 and done only once */
1505 cur_st = (state_ptr) ckalloc(sizeof(state)*(ld+1));
1506 last_st = (state_ptr) ckalloc(sizeof(state)*(ld+1));
1507 cur_i_st = (state_ptr) ckalloc(sizeof(state)*(ld+1));
1512 for (i = 0; i < lp; i++) {
1513 wt = &wgts[pro[i]][0];
1514 for (j = 0; j < 2; j++) {
1518 for (j = 2; j < ld; j++) {
1523 e3 = e2-shift; e2 = last[j-3].C;
1524 e1 = last[j-2].C-shift;
1525 if (e1 > sc) {sc = e1; del = 2;}
1526 if (e2 > sc) {sc = e2; del = 3;}
1527 if (e3 > sc) {sc = e3; del = 4;}
1530 if (sc < -score) sc=-score;
1534 if (sc < (ci=last[j].I)) {
1537 if (sc < (cd=cur[j].D)) {
1543 cur[j+3].D = e-gext;
1546 cur[j+3].D = cd-gext;
1547 cur_d_st[j+3] = cur_d_st[j]+3;
1552 cur_st[j].i = cur_st[j-e1].i;
1553 cur_st[j].j = cur_st[j-e1].j;
1556 cur_st[j].i = cur_i_st[j].i;
1557 cur_st[j].j = cur_i_st[j].j;
1564 cur_st[j].i = last_st[j-del].i;
1565 cur_st[j].j = last_st[j-del].j;
1572 cur_st[j].j = max(0, j-del+1);
1582 cur_i_st[j].i = cur_st[j].i;
1583 cur_i_st[j].j = cur_st[j].j;
1585 cur[j].I = ci- gext;
1595 swap((void **)&last, (void **)&cur);
1596 swap((void **)&cur_st, (void **)&last_st);
1598 /* printf("The best score is %d\n", best); */
1599 *x = x1; *y = x2; *ex = x3; *ey = x4;
1600 free(cur_st); free(last_st); free(cur_i_st);
1606 Both global_up and global_down do linear space score only global
1607 alignments on subsequence pro[x]...pro[ex], and dna[y]...dna[ey].
1608 global_up do the algorithm upwards, from row x towards row y.
1609 global_down do the algorithm downwards, from row y towards x.
1613 global_up(st_ptr *row1, st_ptr *row2,
1614 int x, int y, int ex, int ey,
1615 int **wgts, int gop, int gext, int shift,
1616 unsigned char *dnap,
1619 int i, j, k, sc, e, e1, e2, e3, t, ci, cd, score, *wt;
1622 cur = *row1; last = *row2;
1624 for (j = 1; j <= ey-y+1; j++) {
1625 if (j % 3 == 0) {last[j].C = sc; sc -= gext; last[j].I = sc-gop;}
1626 else { last[j].I = last[j].C = -10000;}
1629 last[0].C = 0; cur[0].D = cur[1].D = cur[2].D = -10000;
1630 last[0].D = last[1].D = last[2].D = -10000;
1631 if (N) last[0].I = -gext; else last[0].I = -gop-gext;
1632 for (i = 1; i <= ex-x+1; i++) {
1633 wt = &wgts[pro[i+x-1]][0]; e2 = last[0].C; e1 = -10000;
1634 for (j = 0; j <= ey-y+1; j++) {
1637 if (t < 3) score = -10000;
1638 else score = wt[dnap[t-3]];
1640 if (j == 3) sc = e2;
1641 else if (j == 2) sc = e2-shift;
1645 sc = max(max(e1, e3)-shift, e2);
1648 sc = max(sc, max(ci=last[j].I, cd = cur[j].D));
1650 cur[j+3].D = max(cd, sc-gop)-gext;
1651 cur[j].I = max(ci, sc-gop)-gext;
1653 swap((void **)&last, (void **)&cur);
1655 for (i = 0; i <= ey-y+1; i++) last[i].I = cur[i].I;
1656 if (*row1 != last) swap((void **)row1, (void **)row2);
1660 global_down(st_ptr *row1, st_ptr *row2,
1661 int x, int y, int ex, int ey,
1662 int **wgts, int gop, int gext, int shift,
1663 unsigned char *dnap, unsigned char *pro,
1665 int i, j, k, sc, del, *tmp, e, t, e1,e2,e3, ci,cd, s1, s2, s3, *wt;
1668 cur = (*row1); last = *row2;
1670 for (j = ey-y; j >= 0; j--) {
1671 if ((ey-y+1-j) % 3) {last[j].C = sc; sc-=gext; last[j].I = sc-gop;}
1672 else last[j].I = last[j].C = -10000;
1675 cur[ey-y+1].D = cur[ey-y].D = cur[ey-y-1].D = -10000;
1676 last[ey-y+1].D = last[ey-y].D = last[ey-y-1].D = -10000;
1677 if (N) last[ey-y+1].I = -gext; else last[ey-y+1].I = -gop-gext;
1678 for (i = ex-x; i >= 0; i--) {
1679 wt = &wgts[pro[i+x]][0]; e2 = last[ey-y+1].C;
1680 e1 = s2 = s3 = -10000;
1681 for (j = ey-y+1; j >= 0; j--) {
1686 if (t+2==ey) sc = e2+s2;
1687 else if (t+1==ey) sc = e2-shift+s1;
1691 sc = max(max(e1+s1, e3+s3)-shift, e2+s2);
1693 if (sc < (cd= cur[j].D)) {
1695 cur[j-3].D = cd-gext;
1696 } else cur[j-3].D =max(cd, sc-gop)-gext;
1697 if (sc < (ci= last[j].I)) {
1699 cur[j].I = ci - gext;
1700 } else cur[j].I = max(sc-gop,ci)-gext;
1704 swap((void **)&last, (void **)&cur);
1706 for (i = 0; i <= ey-y+1; i++) last[i].I = cur[i].I;
1707 if (*row1 != last) swap((void **)row1, (void **)row2);
1711 init_row2(int *row, int ld) {
1713 for (i = 0; i < ld; i++) row[i] = 0;
1717 init_ROW(st_ptr row, int ld) {
1719 for (i = 0; i < ld; i++) row[i].I = row[i].D = row[i].C = 0;
1723 combine(match_ptr x1, match_ptr x2, int st) {
1726 if (x1 == NULL) return x2;
1727 for (x = x1; x->next; x = x->next);
1730 for (x = x2; x; x = x->next) {
1732 if (x->l == 3 || x->l == 4) break;
1740 global use the two upwards and downwards score only linear
1741 space global alignment subroutine to recursively build the
1746 global(int x, int y, int ex, int ey,
1747 int **wgts, int gop, int gext, int shift,
1748 unsigned char *dnap,
1751 st_ptr *up_stp, st_ptr *dn_stp, st_ptr *tp_stp
1756 match_ptr x1, x2, mm1, mm2;
1757 /*printf("%d %d %d %d\n", x,y, ex, ey);*/
1759 if the space required is limited, we can do a quadratic space
1760 algorithm to find the alignment.
1763 mm1 = NULL; mm2= NULL;
1764 for (m = y+3; m <= ey; m+=3) {
1765 x1 = (match_ptr) ckalloc(sizeof(match_node));
1766 x1->l = 5; x1->next = mm1;
1767 if (mm1== NULL) mm2 = x1;
1771 if ((ey-y) % 3 != 0) {
1772 x1 = (match_ptr) ckalloc(sizeof(match_node));
1773 x1->l = ((ey-y) % 3) +1; x1->next = NULL;
1774 if (mm2) mm2->next = x1;
1777 if (mm2) mm2->l = 4;
1784 for (m = x; m <= ex; m++) {
1785 x1 = (match_ptr) ckalloc(sizeof(match_node));
1786 x1->l = 0; x1->next = mm1; mm1 = x1;
1790 if (ex -x < SGW1-1 && ey-y < SGW2-1)
1791 return small_global(x,y,ex,ey,
1792 wgts, gop, gext, shift,
1796 Do the score only global alignment from row x to row m, m is
1797 the middle row of x and ex. Store the information of row m in
1800 global_up(up_stp, tp_stp, x, y, m, ey,
1801 wgts, gop, gext, shift,
1805 Do the score only global alignment downwards from row ex
1806 to row m+1, store information of row m+1 in downC downI and downD
1808 global_down(dn_stp, tp_stp, m+1, y, ex, ey,
1809 wgts, gop, gext, shift,
1813 Use these information of row m and m+1, to find the crossing
1814 point of the best alignment with the middle row. The crossing
1815 point is given by m1 and m2. Then we recursively call global
1816 itself to compute alignments in two smaller regions found by
1817 the crossing point and combine the two alignments to form a
1818 whole alignment. Return that alignment.
1820 if (find_best(*up_stp, *dn_stp, &m1, &m2, ey-y+1, y, gop)) {
1821 x1 = global(x, y, m, m1, wgts, gop, gext, shift, dnap, pro, N1, 0,
1822 up_stp, dn_stp, tp_stp);
1823 x2 = global(m+1, m2, ex, ey, wgts, gop, gext, shift, dnap, pro, 0, N2,
1824 up_stp, dn_stp, tp_stp);
1825 if (m1 == m2) x1 = combine(x1,x2,1);
1826 else x1 = combine(x1, x2,0);
1828 x1 = global(x, y, m-1, m1, wgts, gop, gext, shift, dnap, pro, N1, 1,
1829 up_stp, dn_stp, tp_stp);
1830 x2 = global(m+2, m2, ex, ey, wgts, gop, gext, shift, dnap, pro, 1, N2,
1831 up_stp, dn_stp, tp_stp);
1832 mm1 = (match_ptr) ckalloc(sizeof(match_node));
1833 mm1->i = m; mm1->l = 0; mm1->j = m1;
1834 mm2 = (match_ptr) ckalloc(sizeof(match_node));
1835 mm2->i = m+1; mm2->l = 0; mm2->j = m1;
1836 mm1->next = mm2; mm2->next = x2;
1837 x1 = combine(x1, mm1, 0);
1843 find_best(st_ptr up, st_ptr down,
1845 int ld, int y, int gop) {
1846 int i, best = -100000, j = 0, s1, s2, s3, s4, st;
1848 for (i = 1; i < ld; i++) {
1849 s2 = up[i-1].C + down[i].C;
1850 s4 = up[i-1].I + down[i].I + gop;
1852 best = s2; j = i; st = 1;
1855 best = s4; j = i; st = 0;
1860 /*printf("find best score =%d\n", best);*/
1865 An alignment is represented as a linked list whose element
1866 is of type match_node. Each element represent an edge in the
1867 path of the alignment graph. The fields of match_node are
1868 l --- gives the type of the edge.
1869 i, j --- give the end position.
1873 small_global(int x, int y, int ex, int ey,
1874 int **wgts, int gop, int gext, int shift,
1875 unsigned char *dnap, unsigned char *pro,
1877 static int C[SGW1+1][SGW2+1], st[SGW1+1][SGW2+1], D[SGW2+7], I[SGW2+1];
1878 int i, j, e, sc, score, del, k, t, *wt, ci, cd;
1879 int *cI, *cD, *cC, *lC, *cst, e2, e3, e4;
1880 match_ptr mp, first;
1882 /*printf("small_global %d %d %d %d\n", x, y, ex, ey);*/
1883 sc = -gop-gext; C[0][0] = 0;
1884 if (N1) I[0] = -gext; else I[0] = sc;
1885 for (j = 1; j <= ey-y+1; j++) {
1887 C[0][j] = sc; sc -= gext; I[j] = sc-gop;
1888 } else I[j] = C[0][j] = -10000;
1891 lC = &C[0][0]; cD = D; D[0] = D[1] = D[2] = -10000;
1893 for (i = 1; i <= ex-x+1; i++) {
1895 wt = &wgts[pro[i+x-1]][0]; cst = &st[i][0];
1896 for (j = 0; j <=ey-y+1; j++) {
1897 sc = -10000; del = 0;
1901 if (t < 3) score = -10000;
1902 else score = wt[dnap[t-3]];
1904 e2 = lC[j-2]-shift; sc = lC[j-3]; e4 = lC[j-4]-shift;
1906 if (e2 > sc) { sc = e2; del = 2;}
1907 if (e4 >= sc) { sc = e4; del = 4;}
1909 if (j ==3) {sc= lC[0]; del = 3;}
1910 else if (j == 2) {sc = lC[0]-shift; del = 2;}
1924 cD[j+3] = cd - gext;
1925 } else cD[j+3] = sc -gext;
1929 } else cI[j] = sc-gext;
1934 if (N2 && ci +gop > cC[ey-y+1]) {
1935 st[ex-x+1][ey-y+1] = 0;
1936 /*printf("small score = %d\n", ci+gop);*/
1937 } /*else printf("small score =%d\n", cC[ey-y+1]);*/
1938 first = NULL; e = 1;
1939 for (i = ex+1, j = ey+1; i > x || j > y; i--) {
1940 mp = (match_ptr) ckalloc(sizeof(match_node));
1942 k = (t=st[i-x][j-y])%10;
1944 if (e == 5 && (t/10)%2 == 1) k = 5;
1945 if (e == 0 && (t/20)== 1) k = 0;
1946 if (k == 5) { j -= 3; i++; e=5;}
1947 else {j -= k;if (k==0) e= 0; else e = 1;}
1953 /* for (i = 0; i <= ex-x; i++) {
1954 for (j = 0; j <= ey-y; j++)
1955 printf("%d ", C[i][j]);
1966 extern void display_alig(a, dna, pro,length, ld)
1968 unsigned char *dna, *pro;
1971 int len = 0, i, j, x, y, lines, k;
1972 static char line1[100], line2[100], line3[100],
1974 unsigned char *dna1, c1, c2, c3, *st;
1976 dna1 = ckalloc((size_t)ld);
1977 for (st = dna, i = 0; i < ld; i++, st++) dna1[i] = aa[*st];
1978 line1[0] = line2[0] = line3[0] = '\0'; x= a[0]; y = a[1]-1;
1980 for (len = 0, j = 2, lines = 0; j < length; j++) {
1982 /*printf("%d %d %d\n", i, len, b->j);*/
1983 if (i > 0 && i < 5) tmp[i-2] = aa[pro[x++]];
1985 i = 3; tmp[0] = tmp[1] = tmp[2] = '-';
1986 if (a[j+1] == 2) tmp[2] = ' ';
1989 strncpy(&line1[len], (const char *)&dna1[y], i); y+=i;
1990 } else {line1[len] = '-'; i = 1; tmp[0] = aa[pro[x++]];}
1991 strncpy(&line2[len], tmp, i);
1992 for (k = 0; k < i; k++) {
1993 if (tmp[k] != ' ' && tmp[k] != '-') {
1994 if (k == 2) tmp[k] = '\\';
1995 else if (k == 1) tmp[k] = '|';
1997 } else tmp[k] = ' ';
1999 if (i == 1) tmp[0] = ' ';
2000 strncpy(&line3[len], tmp, i);
2001 tmp[0] = tmp[1] = tmp[2] = ' ';
2003 line1[len] = line2[len] =line3[len] = '\0';
2005 printf("\n%5d", WIDTH*lines++);
2006 for (k = 10; k <= WIDTH; k+=10)
2008 if (k-5 < WIDTH) printf(" .");
2009 c1 = line1[WIDTH]; c2 = line2[WIDTH]; c3 = line3[WIDTH];
2010 line1[WIDTH] = line2[WIDTH] = line3[WIDTH] = '\0';
2011 printf("\n %s\n %s\n %s\n", line1, line3, line2);
2012 line1[WIDTH] = c1; line2[WIDTH] = c2; line3[WIDTH] = c3;
2013 strncpy(line1, &line1[WIDTH], sizeof(line1)-1);
2014 strncpy(line2, &line2[WIDTH], sizeof(line2)-1);
2015 strncpy(line3, &line3[WIDTH], sizeof(line3)-1);
2019 printf("\n%5d", WIDTH*lines);
2020 for (k = 10; k < len; k+=10)
2022 if (k-5 < len) printf(" .");
2023 printf("\n %s\n %s\n %s\n", line1, line3, line2);
2027 /* alignment store the operation that align the protein and dna sequence.
2028 The code of the number in the array is as follows:
2029 0: delete of an amino acid.
2030 2: frame shift, 2 nucleotides match with an amino acid
2031 3: match an amino acid with a codon
2032 4: the other type of frame shift
2033 5: delete of a codon
2036 Also the first two element of the array stores the starting point
2037 in the protein and dna sequences in the local alignment.
2039 Display looks like where WIDTH is assumed to be divisible by 10.
2041 0 . : . : . : . : . : . :
2042 CCTATGATACTGGGATACTGGAACGTCCGCGGACTGACACACCCGATCCGCATGCTCCTG
2043 P M I L G Y W N V R G L T H P I R M L L
2045 60 . : . : . : . : . : . :
2046 GAATACACAGACTCAAGCTATGATGAGAAGAGATACACCATGGGTGACGCTCCCGACTTT
2047 E Y T D S S Y D E K R Y T M G D A P D F
2051 /* fatal - print message and die */
2055 fprintf(stderr, "%s\n", msg);
2059 int do_walign (const unsigned char *aa0, int n0,
2060 const unsigned char *aa1, int n1,
2062 struct pstruct *ppst,
2063 struct f_struct *f_str,
2064 struct a_res_str *a_res,
2068 int i, last_n1, itemp, n10;
2069 int n_aa, n_nt, hoff, nt_min, nt_max, w_fact;
2070 unsigned char *fs, *fd;
2074 #ifndef TFAST /* FASTX */
2078 /* check for large differences in sequence length */
2079 nt_min = 0; nt_max = n_nt;
2080 if (n_nt > 6 * n_aa) {
2081 /* find out where the diagonal is - get hoff
2082 hoff < 0 => seq0 is in the middle of seq1
2084 do_fastx(f_str->aa0x, n0, aa1, n1, ppst, f_str, &rst, &hoff);
2085 if (rst.score[0] > 2 * rst.score[2]) {w_fact = 4;}
2088 if (hoff > n_aa) { /* hoff > 0 => seq1 is in the middle of seq0 */
2089 nt_min = max(0,(hoff-w_fact*n_aa)*3);
2090 nt_max = min((hoff+w_fact*n_aa)*3,n_nt);
2093 nt_max = min(3*w_fact*n_aa,n_nt);
2097 a_res->res = f_str->res;
2099 score = pro_dna(aa1, n1, f_str->aa0y+nt_min, nt_max-nt_min, ppst->pam2[0],
2100 #ifdef OLD_FASTA_GAP
2101 -(ppst->gdelval - ppst->ggapval),
2107 f_str->max_res, a_res);
2109 /* correct for nt_min missing residues in alignment */
2114 for (i=0; i<n1; i++) {
2115 fputc(ppst->sq[f_str->aa1x[i]],stderr);
2116 if (i%60==59) fputc('\n',stderr);
2118 fprintf(stderr,"\n-----\n");
2122 for (itx=3*frame; itx<3+3*frame; itx++) {
2123 n10 = saatran(aa1,&f_str->aa1x[last_n1],n1,itx);
2125 for (i=0; i<n10; i++) {
2126 fprintf(stderr,"%c",pst.sq[aa10[last_n1+i]]);
2127 if ((i%60)==59) fprintf(stderr,"\n");
2129 fprintf(stderr,"\n");
2135 /* create aa1y from aa1x */
2136 for (fs=f_str->aa1x,itemp=0; itemp <3; itemp++,fs++) {
2137 for (fd= &f_str->aa1y[itemp]; *fs!=EOSEQ; fd += 3, fs++) *fd = *fs;
2141 for (i=0; i<n1; i++) {
2142 fputc(ppst->sq[f_str->aa1y[i]],stderr);
2143 if (i%60==59) fputc('\n',stderr);
2145 fprintf(stderr,"\n-----\n");
2151 /* check for large differences in sequence length */
2152 nt_min = 0; nt_max = n_nt;
2153 if (n_nt > 6 * n_aa) {
2154 /* find out where the diagonal is - get hoff
2155 hoff < 0 => seq0 is in the middle of seq1
2157 do_fastx(aa0, n0, f_str->aa1x, n10, ppst, f_str, &rst, &hoff);
2158 if (rst.score[0] > 2 * rst.score[2]) {w_fact = 4;}
2161 if ( hoff > n_aa) { /* hoff > 0 => seq1 is in the middle of seq0 */
2162 nt_min = max(0,(hoff-w_fact*n_aa)*3);
2163 nt_max = min((hoff+w_fact*n_aa)*3,n_nt);
2166 nt_max = min(3*w_fact*n_aa,n_nt);
2170 a_res->res = f_str->res;
2172 score = pro_dna(aa0, n0, f_str->aa1y+nt_min, nt_max-nt_min, ppst->pam2[0],
2173 #ifdef OLD_FASTA_GAP
2174 -(ppst->gdelval - ppst->ggapval),
2180 f_str->max_res, a_res);
2184 /* pro_dna always compares protein to DNA, and returns protein
2185 coordinates in a_res->min0,max0 */
2187 a_res->min1 += nt_min;
2188 a_res->max1 += nt_min;
2190 /* display_alig(f_str->res,f_str->aa0y,aa1,*nres,n0); */
2196 /* aln_func_vals - set up aln.qlfact, qlrev, llfact, llmult, frame, llrev */
2197 /* call from calcons, calc_id, calc_code */
2199 aln_func_vals(int frame, struct a_struct *aln) {
2207 if (frame > 0) aln->qlrev = 1;
2208 else aln->qlrev = 0;
2215 if (frame > 0) aln->llrev = 1;
2216 else aln->llrev = 0;
2220 /* this function is required for programs like tfastx/y/s that do
2221 translations on DNA sequences and save them in f_str->aa1??
2225 pre_cons(const unsigned char *aa1, int n1, int frame, struct f_struct *f_str) {
2227 int i, last_n1, itemp, n10;
2228 unsigned char *fs, *fd;
2232 for (itx=3*frame; itx<3+3*frame; itx++) {
2233 n10 = saatran(aa1,&f_str->aa1x[last_n1],n1,itx);
2235 for (i=0; i<n10; i++) {
2236 fprintf(stderr,"%c",pst.sq[aa10[last_n1+i]]);
2237 if ((i%60)==59) fprintf(stderr,"\n");
2239 fprintf(stderr,"\n");
2245 /* create aa1y from aa1x */
2246 for (fs=f_str->aa1x,itemp=0; itemp <3; itemp++,fs++) {
2247 for (fd= &f_str->aa1y[itemp]; *fs!=EOSEQ; fd += 3, fs++) *fd = *fs;
2255 Alignment: store the operation that align the protein and dna sequence.
2256 The code of the number in the array is as follows:
2257 0: delete of an amino acid.
2258 2: frame shift, 2 nucleotides match with an amino acid
2259 3: match an amino acid with a codon
2260 4: the other type of frame shift
2261 5: delete of a codon
2263 The first two elements of the array stores the starting point
2264 in the protein and dna sequences in the local alignment.
2269 int calcons(const unsigned char *aa0, int n0,
2270 const unsigned char *aa1, int n1,
2272 struct a_struct *aln,
2273 struct a_res_str a_res,
2275 char *seqc0, char *seqc1, char *seqca,
2276 struct f_struct *f_str)
2279 int lenc, not_c, itmp, ngap_p, ngap_d, nfs;
2280 char *sp0, *sp1, *spa, *sq;
2281 const unsigned char *ap0, *ap1;
2284 if (pst.ext_sq_set) {sq = pst.sqx;}
2289 #ifndef TFAST /* FASTX */
2290 aln->amin1 = aln->smin1 = a_res.min0; /* prot */
2291 aln->amin0 = aln->smin0 = a_res.min1; /* DNA */
2293 ap0 = f_str->aa0y; /* translated DNA */
2294 ap1 = aa1; /* protein */
2299 aln->amin0 = aln->smin0 = a_res.min0; /* DNA */
2300 aln->amin1 = aln->smin1 = a_res.min1; /* prot */
2302 ap1 = aa0; /* protein */
2303 ap0 = f_str->aa1y; /* translated DNA */
2310 rpmax = rp+a_res.nres;
2314 lenc = not_c = aln->nident = aln->nsim = ngap_p = ngap_d = nfs= 0;
2318 while (rp < rpmax) {
2319 /* fprintf(stderr,"%d %d %d (%c) %d (%c)\n"
2320 ,(int)(rp-res),*rp,i0,sq[ap0[i0]],i1,sq[ap1[i1]]);
2323 case 0: /* aa insertion */
2325 *sp1++ = sq[ap1[i1++]];
2330 case 2: /* -1 frameshift */
2338 if ((itmp=pst.pam2[0][ap0[i0]][ap1[i1]])<0) { *spa = M_NEG; }
2339 else if (itmp == 0) { *spa = M_ZERO;}
2340 else {*spa = M_POS;}
2341 if (*spa == M_POS || *spa == M_ZERO) { aln->nsim++;}
2345 *sp1 = sq[ap1[i1++]];
2346 if (toupper(*sp0) == toupper(*sp1)) {aln->nident++; *spa = M_IDENT;}
2347 sp0++; sp1++; spa++;
2350 case 3: /* codon/aa match */
2351 if ((itmp=pst.pam2[0][ap0[i0]][ap1[i1]])<0) { *spa = M_NEG; }
2352 else if (itmp == 0) { *spa = M_ZERO;}
2353 else {*spa = M_POS;}
2354 if (*spa == M_POS || *spa == M_ZERO) { aln->nsim++;}
2358 *sp1 = sq[ap1[i1++]];
2359 if (toupper(*sp0) == toupper(*sp1)) {aln->nident++; *spa = M_IDENT;}
2360 sp0++; sp1++; spa++;
2363 case 4: /* +1 frameshift */
2371 if ((itmp=pst.pam2[0][ap0[i0]][ap1[i1]])<0) { *spa = M_NEG; }
2372 else if (itmp == 0) { *spa = M_ZERO;}
2373 else {*spa = M_POS;}
2374 if (*spa == M_POS || *spa == M_ZERO) { aln->nsim++;}
2378 *sp1 = sq[ap1[i1++]];
2379 if (toupper(*sp0) == toupper(*sp1)) {aln->nident++; *spa = M_IDENT;}
2380 sp0++; sp1++; spa++;
2383 case 5: /* codon insertion */
2384 *sp0++ = sq[ap0[i0]];
2395 #ifndef TFAST /* FASTX */
2398 aln->ngap_q = ngap_d;
2399 aln->ngap_l = ngap_p;
2403 aln->amin1 = aln->smin1;
2404 aln->amin0 = aln->smin0;
2405 aln->ngap_q = ngap_p;
2406 aln->ngap_l = ngap_d;
2410 if (lenc < 0) lenc = 1;
2412 /* now we have the middle, get the right end */
2416 int calcons_a(const unsigned char *aa0, unsigned char *aa0a, int n0,
2417 const unsigned char *aa1, int n1,
2419 struct a_struct *aln,
2420 struct a_res_str a_res,
2422 char *seqc0, char *seqc0a, char *seqc1, char *seqca,
2423 char *ann_arr, struct f_struct *f_str)
2426 int lenc, not_c, itmp, ngap_p, ngap_d, nfs;
2427 char *sp0, *sp0a, *sp1, *spa, *sq;
2428 const unsigned char *ap0, *ap1;
2431 if (pst.ext_sq_set) {sq = pst.sqx;}
2434 #ifndef TFAST /* FASTX */
2435 aln->amin1 = aln->smin1 = a_res.min0; /* prot */
2436 aln->amin0 = aln->smin0 = a_res.min1; /* DNA */
2438 ap0 = f_str->aa0y; /* translated DNA */
2439 ap1 = aa1; /* protein */
2441 aln->amin0 = aln->smin0 = a_res.min0; /* DNA */
2442 aln->amin1 = aln->smin1 = a_res.min1; /* prot */
2449 rpmax = &a_res.res[a_res.nres];
2461 lenc = not_c = aln->nident = aln->nsim = ngap_p = ngap_d = nfs= 0;
2465 while (rp < rpmax) {
2466 /* fprintf(stderr,"%d %d %d (%c) %d (%c)\n"
2467 ,(int)(rp-res),*rp,i0,sq[ap0[i0]],i1,sq[ap1[i1]]);
2470 case 0: /* aa insertion */
2472 *sp1++ = sq[ap1[i1++]];
2478 case 2: /* -1 frameshift */
2487 if ((itmp=pst.pam2[0][ap0[i0]][ap1[i1]])<0) { *spa = M_NEG; }
2488 else if (itmp == 0) { *spa = M_ZERO;}
2489 else {*spa = M_POS;}
2490 if (*spa == M_POS || *spa == M_ZERO) { aln->nsim++;}
2495 *sp0a++ = ann_arr[aa0a[i1]];
2499 *sp1 = sq[ap1[i1++]];
2500 if (toupper(*sp0) == toupper(*sp1)) {aln->nident++; *spa = M_IDENT;}
2501 sp0++; sp1++; spa++;
2504 case 3: /* codon/aa match */
2505 if ((itmp=pst.pam2[0][ap0[i0]][ap1[i1]])<0) { *spa = M_NEG; }
2506 else if (itmp == 0) { *spa = M_ZERO;}
2507 else {*spa = M_POS;}
2508 if (*spa == M_POS || *spa == M_ZERO) { aln->nsim++;}
2513 *sp0a++ = ann_arr[aa0a[i1]];
2517 *sp1 = sq[ap1[i1++]];
2518 if (toupper(*sp0) == toupper(*sp1)) {aln->nident++; *spa = M_IDENT;}
2519 sp0++; sp1++; spa++;
2522 case 4: /* +1 frameshift */
2531 if ((itmp=pst.pam2[0][ap0[i0]][ap1[i1]])<0) { *spa = M_NEG; }
2532 else if (itmp == 0) { *spa = M_ZERO;}
2533 else {*spa = M_POS;}
2534 if (*spa == M_POS || *spa == M_ZERO) { aln->nsim++;}
2538 *sp1 = sq[ap1[i1++]];
2539 if (toupper(*sp0) == toupper(*sp1)) {aln->nident++; *spa = M_IDENT;}
2540 sp0++; sp1++; spa++;
2543 case 5: /* codon insertion */
2545 *sp0++ = sq[ap0[i0]];
2554 *sp0a = *spa = '\0';
2559 aln->ngap_q = ngap_d;
2560 aln->ngap_l = ngap_p;
2564 aln->ngap_q = ngap_p;
2565 aln->ngap_l = ngap_d;
2569 if (lenc < 0) lenc = 1;
2571 /* now we have the middle, get the right end */
2575 /* build an array of match/ins/del - length strings */
2576 int calc_code(const unsigned char *aa0, int n0,
2577 const unsigned char *aa1, int n1,
2578 struct a_struct *aln,
2579 struct a_res_str a_res,
2581 char *al_str, int al_str_n, struct f_struct *f_str)
2584 int lenc, not_c, itmp, ngap_p, ngap_d, nfs;
2588 const unsigned char *ap0, *ap1;
2591 if (pst.ext_sq_set) {sq = pst.sqx;}
2595 #ifndef TFAST /* FASTX */
2596 strncpy(op_char,"- /=\\+*",sizeof(op_char));
2597 aln->amin1 = aln->smin1 = a_res.min0; /* prot */
2598 aln->amin0 = aln->smin0 = a_res.min1; /* DNA */
2603 strncpy(op_char,"+ /=\\-*",sizeof(op_char));
2604 aln->amin0 = aln->smin0 = a_res.min0; /* DNA */
2605 aln->amin1 = aln->smin1 = a_res.min1; /* prot */
2612 rpmax = &a_res.res[a_res.nres];
2614 op_cnt = lenc = not_c = aln->nident = aln->nsim = ngap_p = ngap_d = nfs = 0;
2615 op = 3; /* code for a match - all alignments start with a match */
2620 while (rp < rpmax) {
2622 case 0: /* aa insertion */
2623 if (op == 0) op_cnt++;
2625 update_code(al_str, al_str_n-strlen(al_str),op, op_cnt,op_char);
2632 case 2: /* -1 frameshift */
2633 if (pst.pam2[0][ap0[i0]][ap1[i1]]>=0) { aln->nsim++;}
2635 update_code(al_str, al_str_n-strlen(al_str),op, op_cnt,op_char);
2637 update_code(al_str, al_str_n-strlen(al_str),op, op_cnt,op_char);
2644 sp1 = sq[ap1[i1++]];
2645 if (toupper(sp0) == toupper(sp1)) aln->nident++;
2648 case 3: /* codon/aa match */
2649 if (pst.pam2[0][ap0[i0]][ap1[i1]]>=0) { aln->nsim++;}
2652 sp1 = sq[ap1[i1++]];
2653 if (toupper(sp0) == toupper(sp1)) aln->nident++;
2655 if (op == 3 || op == 6) {
2656 if (sp0 != '*' && sp1 != '*') {
2658 update_code(al_str, al_str_n-strlen(al_str),op, op_cnt,op_char);
2664 update_code(al_str, al_str_n-strlen(al_str),op, op_cnt,op_char);
2669 update_code(al_str, al_str_n-strlen(al_str),op, op_cnt,op_char);
2670 if (op == 2 || op == 4) op_cnt = 2;
2676 case 4: /* +1 frameshift */
2677 update_code(al_str, al_str_n-strlen(al_str),op, op_cnt,op_char);
2679 update_code(al_str, al_str_n-strlen(al_str),op, op_cnt,op_char);
2685 if (pst.pam2[0][ap0[i0]][ap1[i1]]>=0) { aln->nsim++;}
2688 sp1 = sq[ap1[i1++]];
2689 if (toupper(sp0) == toupper(sp1)) aln->nident++;
2692 case 5: /* codon insertion */
2693 if (op == 5) op_cnt++;
2695 update_code(al_str, al_str_n-strlen(al_str),op, op_cnt,op_char);
2704 update_code(al_str, al_str_n-strlen(al_str),op, op_cnt,op_char);
2709 aln->ngap_q = ngap_d;
2710 aln->ngap_l = ngap_p;
2714 aln->ngap_q = ngap_p;
2715 aln->ngap_l = ngap_d;
2719 if (lenc < 0) lenc = 1;
2725 update_code(char *al_str, int al_str_max, int op, int op_cnt, char *op_char) {
2729 sprintf(tmp_cnt,"%c%d",op_char[op],op_cnt);
2730 strncat(al_str,tmp_cnt,al_str_max-1);
2731 al_str[al_str_max-1]='\0';
2734 int calc_id(const unsigned char *aa0, int n0,
2735 const unsigned char *aa1, int n1,
2736 struct a_struct *aln,
2737 struct a_res_str a_res,
2739 struct f_struct *f_str)
2742 int lenc, not_c, itmp, ngap_p, ngap_d, nfs;
2744 const unsigned char *ap0, *ap1;
2747 if (pst.ext_sq_set) {sq = pst.sqx;}
2751 #ifndef TFAST /* FASTX */
2752 aln->amin1 = aln->smin1 = a_res.min0; /* prot */
2753 aln->amin0 = aln->smin0 = a_res.min1; /* DNA */
2758 aln->amin0 = aln->smin0 = a_res.min0; /* DNA */
2759 aln->amin1 = aln->smin1 = a_res.min1; /* prot */
2766 rpmax = &a_res.res[a_res.nres];
2768 lenc = not_c = aln->nident = aln->nsim = ngap_p = ngap_d = nfs = 0;
2772 while (rp < rpmax) {
2773 /* fprintf(stderr,"%d %d %d (%c) %d (%c)\n"
2774 ,(int)(rp-res),*rp,i0,sq[ap0[i0]],i1,sq[ap1[i1]]);
2777 case 0: /* aa insertion */
2782 case 2: /* -1 frameshift */
2786 if (pst.pam2[0][ap0[i0]][ap1[i1]]>=0) { aln->nsim++;}
2789 sp1 = sq[ap1[i1++]];
2790 if (toupper(sp0) == toupper(sp1)) aln->nident++;
2793 case 3: /* codon/aa match */
2794 if (pst.pam2[0][ap0[i0]][ap1[i1]]>=0) { aln->nsim++;}
2797 sp1 = sq[ap1[i1++]];
2798 if (toupper(sp0) == toupper(sp1)) aln->nident++;
2801 case 4: /* +1 frameshift */
2805 if (pst.pam2[0][ap0[i0]][ap1[i1]]>=0) { aln->nsim++;}
2808 sp1 = sq[ap1[i1++]];
2809 if (toupper(sp0) == toupper(sp1)) aln->nident++;
2812 case 5: /* codon insertion */
2823 aln->ngap_q = ngap_d;
2824 aln->ngap_l = ngap_p;
2828 aln->ngap_q = ngap_p;
2829 aln->ngap_l = ngap_d;
2833 if (lenc < 0) lenc = 1;
2834 /* now we have the middle, get the right end */
2841 update_params(struct qmng_str *qm_msg, struct pstruct *ppst)
2843 ppst->n0 = qm_msg->n0;