/* copyright (c) 1998, 1999 William R. Pearson and the U. of Virginia */ /* $Name: fa_34_26_5 $ - $Id: dropnfa.c,v 1.81 2007/04/26 18:37:19 wrp Exp $ */ /* 18-Sep-2006 - removed global variables for alignment from nw_align and bg_align */ /* 18-Oct-2005 - converted to use a_res and aln for alignment coordinates */ /* 14-May-2003 - modified to return alignment start at 0, rather than 1, for begin:end alignments */ /* implements the fasta algorithm, see: W. R. Pearson, D. J. Lipman (1988) "Improved tools for biological sequence comparison" Proc. Natl. Acad. Sci. USA 85:2444-2448 This version uses Smith-Waterman for final protein alignments W. R. Pearson (1996) "Effective protein sequence comparison" Methods Enzymol. 266:227-258 26-April-2001 - -DGAP_OPEN redefines -f, as gap open penalty 4-Nov-2001 - modify spam() while(1). */ #include #include #include #include #include #include "defs.h" #include "param.h" /* this must be consistent with upam.h */ #define MAXHASH 32 #define NMAP MAXHASH+1 /* globals for fasta */ #define MAXWINDOW 64 #ifndef MAXSAV #define MAXSAV 10 #endif #ifndef ALLOCN0 static char *verstr="3.5 Sept 2006"; #else static char *verstr="3.5an0 Sept 2006"; #endif extern void w_abort(char *, char *); int shscore(const unsigned char *aa0, int n0, int **pam2); extern void init_karlin(const unsigned char *aa0, int n0, struct pstruct *ppst, double *aa0_f, double **kp); extern void init_karlin_a(struct pstruct *, double *, double **); extern int do_karlin(const unsigned char *, int n1, int **, struct pstruct *, double *, double *, double *, double *); extern void aancpy(char *to, char *from, int count, struct pstruct pst); char *ckalloc(size_t); #ifdef TFASTA extern int aatran(const unsigned char *ntseq, unsigned char *aaseq, int maxs, int frame); #endif #include "dropnfa.h" #define DROP_INTERN #include "drop_func.h" struct swstr { int H, E;}; static int dmatch (const unsigned char *aa0, int n0, const unsigned char *aa1, int n1, int hoff, int window, int **pam2, int gdelval, int ggapval, struct f_struct *f_str); /* initialize for fasta */ void init_work (unsigned char *aa0, int n0, struct pstruct *ppst, struct f_struct **f_arg) { int mhv, phv; int hmax; int i0, hv; int pamfact; int btemp; struct f_struct *f_str; /* these used to be globals, but do not need to be */ int ktup; /* word size examined */ int fact; /* factor used to scale ktup match value */ int kt1; /* ktup-1 */ int lkt; /* last ktup - initiall kt1, but can be increased for hsq >= NMAP */ int maxn0; /* used in band alignment */ int *pwaa; /* pam[aa0[]] profile */ int i, j; struct swstr *ss; int *waa; int nsq, ip, *hsq; if (ppst->ext_sq_set) { nsq = ppst->nsqx; ip = 1; hsq = ppst->hsqx; } else { nsq = ppst->nsq; ip = 0; hsq = ppst->hsq; } f_str = (struct f_struct *)calloc(1,sizeof(struct f_struct)); #ifndef TFASTA if((ppst->zsflag%10) == 6) { f_str->kar_p = NULL; init_karlin(aa0, n0, ppst, &f_str->aa0_f[0], &f_str->kar_p); } #endif btemp = 2 * ppst->param_u.fa.bestoff / 3 + n0 / ppst->param_u.fa.bestscale + ppst->param_u.fa.bkfact * (ppst->param_u.fa.bktup - ppst->param_u.fa.ktup); if (ppst->nt_align) btemp = (btemp*ppst->pam_h)/5; /* normalize to standard +5/-4 */ btemp = min (btemp, ppst->param_u.fa.bestmax); if (btemp > 3 * n0) btemp = 3 * shscore(aa0,n0,ppst->pam2[0]) / 5; ppst->param_u.fa.cgap = btemp + ppst->param_u.fa.bestoff / 3; if (ppst->param_u.fa.optcut_set != 1) #ifndef TFASTA ppst->param_u.fa.optcut = btemp; #else ppst->param_u.fa.optcut = (btemp*3)/2; #endif #ifndef OLD_FASTA_GAP ppst->param_u.fa.pgap = ppst->gdelval + 2*ppst->ggapval; #else ppst->param_u.fa.pgap = ppst->gdelval + ppst->ggapval; #endif pamfact = ppst->param_u.fa.pamfact; ktup = ppst->param_u.fa.ktup; fact = ppst->param_u.fa.scfact * ktup; if (pamfact == -1) pamfact = 0; else if (pamfact == -2) pamfact = 1; for (i0 = 1, mhv = -1; i0 <= ppst->nsq; i0++) if (hsq[i0] < NMAP && hsq[i0] > mhv) mhv = hsq[i0]; if (mhv <= 0) { fprintf (stderr, " maximum hsq <=0 %d\n", mhv); exit (1); } for (f_str->kshft = 0; mhv > 0; mhv /= 2) f_str->kshft++; /* kshft = 2; */ kt1 = ktup - 1; hv = 1; for (i0 = 0; i0 < ktup; i0++) hv = hv << f_str->kshft; hmax = hv; f_str->hmask = (hmax >> f_str->kshft) - 1; if ((f_str->harr = (int *) calloc (hmax, sizeof (int))) == NULL) { fprintf (stderr, " cannot allocate hash array: hmax: %d hmask: %d\n", hmax, f_str->hmask); exit (1); } if ((f_str->pamh1 = (int *) calloc (nsq+1, sizeof (int))) == NULL) { fprintf (stderr, " cannot allocate pamh1 array nsq=%d\n",nsq); exit (1); } if ((f_str->pamh2 = (int *) calloc (hmax, sizeof (int))) == NULL) { fprintf (stderr, " cannot allocate pamh2 array hmax=%d\n",hmax); exit (1); } if ((f_str->link = (int *) calloc (n0, sizeof (int))) == NULL) { fprintf (stderr, " cannot allocate hash link array n0=%d",n0); exit (1); } for (i0 = 0; i0 < hmax; i0++) f_str->harr[i0] = -1; for (i0 = 0; i0 < n0; i0++) f_str->link[i0] = -1; /* encode the aa0 array */ phv = hv = 0; lkt = kt1; /* restart hv, phv calculation */ for (i0 = 0; i0 < min(lkt,n0); i0++) { if (hsq[aa0[i0]] >= NMAP) {hv=phv=0; lkt = i0+ ktup; continue;} hv = (hv << f_str->kshft) + hsq[aa0[i0]]; phv += ppst->pam2[ip][aa0[i0]][aa0[i0]]*ktup; } for (; i0 < n0; i0++) { if (hsq[aa0[i0]] >= NMAP) { hv=phv=0; /* restart hv, phv calculation */ for (lkt = i0+kt1; (i0 < lkt || hsq[aa0[i0]]>=NMAP) && i0= NMAP) { hv=phv=0; lkt = i0+ktup; continue; } hv = (hv << f_str->kshft) + hsq[aa0[i0]]; phv += ppst->pam2[ip][aa0[i0]][aa0[i0]]*ktup; } } if (i0 >= n0) break; hv = ((hv & f_str->hmask) << f_str->kshft) + hsq[aa0[i0]]; f_str->link[i0] = f_str->harr[hv]; f_str->harr[hv] = i0; if (pamfact) { f_str->pamh2[hv] = (phv += ppst->pam2[ip][aa0[i0]][aa0[i0]] * ktup); /* this check should always be true, but just in case */ if (hsq[aa0[i0-kt1]]pam2[ip][aa0[i0 - kt1]][aa0[i0 - kt1]] * ktup; } else f_str->pamh2[hv] = fact * ktup; } /* this has been modified from 0..nsq to 1..<=ppst->nsq because the pam2[0][0] is now undefined for consistency with blast */ if (pamfact) for (i0 = 1; i0 <= nsq; i0++) f_str->pamh1[i0] = ppst->pam2[ip][i0][i0] * ktup; else for (i0 = 1; i0 <= nsq; i0++) f_str->pamh1[i0] = fact; f_str->ndo = 0; f_str->noff = n0-1; #ifndef ALLOCN0 if ((f_str->diag = (struct dstruct *) calloc ((size_t)MAXDIAG, sizeof (struct dstruct)))==NULL) { fprintf (stderr," cannot allocate diagonal arrays: %lu\n", MAXDIAG *sizeof (struct dstruct)); exit (1); }; #else if ((f_str->diag = (struct dstruct *) calloc ((size_t)n0, sizeof (struct dstruct)))==NULL) { fprintf (stderr," cannot allocate diagonal arrays: %ld\n", (long)n0*sizeof (struct dstruct)); exit (1); }; #endif #ifdef TFASTA if ((f_str->aa1x =(unsigned char *)calloc((size_t)ppst->maxlen+2, sizeof(unsigned char))) == NULL) { fprintf (stderr, "cannot allocate aa1x array %d\n", ppst->maxlen+2); exit (1); } f_str->aa1x++; #endif f_str->bss = (struct bdstr *) calloc((size_t)ppst->param_u.fa.optwid*2+4, sizeof(struct bdstr)); f_str->bss++; /* allocate space for the scoring arrays */ maxn0 = n0 + 4; if ((ss = (struct swstr *) calloc (maxn0, sizeof (struct swstr))) == NULL) { fprintf (stderr, "cannot allocate ss array %3d\n", n0); exit (1); } ss++; f_str->ss = ss; /* initialize the "variable" pam array */ if ((waa= (int *)calloc ((size_t)(nsq+1)*n0,sizeof(int))) == NULL) { fprintf(stderr,"cannot allocate waa struct %3d\n",nsq*n0); exit(1); } pwaa = waa; for (i=0; i<=nsq; i++) { for (j=0;jpam2[ip][aa0[j]][i]; pwaa++; } } f_str->waa0 = waa; /* initialize the "conventional" pam array used for alignments */ if ((waa= (int *)calloc ((size_t)(nsq+1)*n0,sizeof(int))) == NULL) { fprintf(stderr,"cannot allocate waa struct %3d\n",nsq*n0); exit(1); } pwaa = waa; for (i=0; i<=nsq; i++) { for (j=0;jpam2[0][aa0[j]][i]; pwaa++; } } f_str->waa1 = waa; f_str->max_res = max(3*n0/2,MIN_RES); /* now we need alignment storage - get it */ if ((f_str->res = (int *)calloc((size_t)f_str->max_res,sizeof(int)))==NULL) { fprintf(stderr,"cannot allocate alignment results array %d\n",f_str->max_res); exit(1); } *f_arg = f_str; } /* pstring1 is a message to the manager, currently 512 */ /* pstring2 is the same information, but in a markx==10 format */ void get_param (struct pstruct *pstr, char *pstring1, char *pstring2) { #ifndef TFASTA char *pg_str="FASTA"; #else char *pg_str="TFASTA"; #endif if (!pstr->param_u.fa.optflag) #ifdef OLD_FASTA_GAP sprintf (pstring1, "%s (%s) function [%s matrix, (%d:%d)%s] ktup: %d\n join: %d, gap-pen: %d/%d, width: %3d", #else sprintf (pstring1, "%s (%s) function [%s matrix, (%d:%d)%s] ktup: %d\n join: %d, open/ext: %d/%d, width: %3d", #endif pg_str,verstr,pstr->pamfile, pstr->pam_h,pstr->pam_l, (pstr->ext_sq_set) ? "xS":"\0", pstr->param_u.fa.ktup, pstr->param_u.fa.cgap, pstr->gdelval, pstr->ggapval, pstr->param_u.fa.optwid); else #ifdef OLD_FASTA_GAP sprintf (pstring1, "%s (%s) function [optimized, %s matrix (%d:%d)%s] ktup: %d\n join: %d, opt: %d, gap-pen: %d/%d, width: %3d", #else sprintf (pstring1, "%s (%s) function [optimized, %s matrix (%d:%d)%s] ktup: %d\n join: %d, opt: %d, open/ext: %d/%d, width: %3d", #endif pg_str,verstr,pstr->pamfile, pstr->pam_h,pstr->pam_l, (pstr->ext_sq_set) ? "xS":"\0", pstr->param_u.fa.ktup, pstr->param_u.fa.cgap, pstr->param_u.fa.optcut, pstr->gdelval, pstr->ggapval, pstr->param_u.fa.optwid); if (pstr->param_u.fa.iniflag) strcat(pstring1," init1"); /* if (pstr->zsflag==0) strcat(pstring1," not-scaled"); else if (pstr->zsflag==1) strcat(pstring1," reg.-scaled"); */ if (pstring2 != NULL) { #ifdef OLD_FASTA_GAP sprintf (pstring2, "; pg_name: %s\n; pg_ver: %s\n; pg_matrix: %s (%d:%d)\n\ ; pg_gap-pen: %d %d\n; pg_ktup: %d\n; pg_optcut: %d\n; pg_cgap: %d\n", #else sprintf (pstring2, "; pg_name: %s\n; pg_ver: %s\n; pg_matrix: %s (%d:%d)\n\ ; pg_open-ext: %d %d\n; pg_ktup: %d\n; pg_optcut: %d\n; pg_cgap: %d\n", #endif pg_str,verstr,pstr->pamfile, pstr->pam_h,pstr->pam_l, pstr->gdelval, pstr->ggapval,pstr->param_u.fa.ktup,pstr->param_u.fa.optcut, pstr->param_u.fa.cgap); } } void close_work (const unsigned char *aa0, int n0, struct pstruct *ppst, struct f_struct **f_arg) { struct f_struct *f_str; f_str = *f_arg; if (f_str != NULL) { if (f_str->kar_p!=NULL) free(f_str->kar_p); f_str->ss--; f_str->bss--; free(f_str->res); free(f_str->waa1); free(f_str->waa0); free(f_str->ss); free(f_str->bss); free(f_str->diag); free(f_str->link); free(f_str->pamh2); free(f_str->pamh1); free(f_str->harr); free(f_str); *f_arg = NULL; } } #ifdef ALLOCN0 void savemax (struct dstruct *, int, struct f_struct *); #else void savemax (struct dstruct *, struct f_struct *); #endif int spam (const unsigned char *, const unsigned char *, struct savestr *, int **, int, int, int); int sconn(struct savestr **, int nsave, int cgap, int pgap, int noff); void kpsort(struct savestr **, int); static int ALIGN(const unsigned char *, const unsigned char *, int, int, int **, int, int, int *, int *, struct f_struct *); static int LOCAL_ALIGN(const unsigned char *, const unsigned char *, int, int, int, int, int **, int, int, int *, int *, int *, int *, int, struct f_struct *); static int B_ALIGN(const unsigned char *A, const unsigned char *B, int M, int N, int low, int up, int **W, int G, int H, int *S, int *nS, int MW, int MX, struct bdstr *bss); static void do_fasta (const unsigned char *aa0, int n0, const unsigned char *aa1, int n1, struct pstruct *ppst, struct f_struct *f_str, struct rstruct *rst, int *hoff) { int nd; /* diagonal array size */ int lhval; int kfact; register struct dstruct *dptr; register int tscor; #ifndef ALLOCN0 register struct dstruct *diagp; #else register int dpos; int lposn0; #endif int noff; struct dstruct *dpmax; register int lpos; int tpos; struct savestr *vmptr; int scor, ib, nsave; int xdrop, do_extend; int ktup, kt1, lkt, *hsq, ip; if (ppst->ext_sq_set) { ip = 1; hsq = ppst->hsqx; } else { ip = 0; hsq = ppst->hsq; } xdrop = -ppst->pam_l; /* do extended alignment in spam iff protein or short sequences */ do_extend = !ppst->nt_align || (n0 < 50) || (n1 < 50); ktup = ppst->param_u.fa.ktup; kt1 = ktup-1; if (n1 < ktup) { rst->score[0] = rst->score[1] = rst->score[2] = 0; return; } if (n0+n1+1 >= MAXDIAG) { fprintf(stderr,"n0,n1 too large: %d, %d\n",n0,n1); rst->score[0] = rst->score[1] = rst->score[2] = -1; return; } #ifdef ALLOCN0 nd = n0; #else nd = n0 + n1; #endif dpmax = &f_str->diag[nd]; for (dptr = &f_str->diag[f_str->ndo]; dptr < dpmax;) { dptr->stop = -1; dptr->dmax = NULL; dptr++->score = 0; } for (vmptr = f_str->vmax; vmptr < &f_str->vmax[MAXSAV]; vmptr++) vmptr->score = 0; f_str->lowmax = f_str->vmax; f_str->lowscor = 0; /* start hashing */ lhval = 0; lkt = kt1; for (lpos = 0; (lpos < lkt || hsq[aa1[lpos]]>=NMAP) && lpos =NMAP) { lhval = 0; lkt = lpos + ktup; continue; } lhval = ((lhval & f_str->hmask) << f_str->kshft) + hsq[aa1[lpos]]; } noff = f_str->noff; #ifndef ALLOCN0 diagp = &f_str->diag[noff + lkt]; for (; lpos < n1; lpos++, diagp++) { if (hsq[aa1[lpos]]>=NMAP) { lpos++ ; diagp++; while (lpos < n1 && hsq[aa1[lpos]]>=NMAP) {lpos++; diagp++;} if (lpos >= n1) break; lhval = 0; } lhval = ((lhval & f_str->hmask) << f_str->kshft) + hsq[aa1[lpos]]; for (tpos = f_str->harr[lhval]; tpos >= 0; tpos = f_str->link[tpos]) { if ((tscor = (dptr = &diagp[-tpos])->stop) >= 0) { #else lposn0 = noff + lpos; for (; lpos < n1; lpos++, lposn0++) { if (hsq[aa1[lpos]]>=NMAP) {lhval = 0; goto loopl;} /* if (hsq[aa1[lpos]]>=NMAP) { lpos++; lposn0++; while (lpos < n1 && hsq[aa1[lpos]]>=NMAP) {lpos++; lposn0++;} } */ lhval = ((lhval & f_str->hmask) << f_str->kshft) + hsq[aa1[lpos]]; for (tpos = f_str->harr[lhval]; tpos >= 0; tpos = f_str->link[tpos]) { dpos = lposn0 - tpos; if ((tscor = (dptr = &f_str->diag[dpos % nd])->stop) >= 0) { #endif tscor += ktup; if ((tscor -= lpos) <= 0) { scor = dptr->score; if ((tscor += (kfact = f_str->pamh2[lhval])) < 0 && f_str->lowscor < scor) #ifdef ALLOCN0 savemax (dptr, dpos, f_str); #else savemax (dptr, f_str); #endif if ((tscor += scor) >= kfact) { dptr->score = tscor; dptr->stop = lpos; } else { dptr->score = kfact; dptr->start = (dptr->stop = lpos) - kt1; } } else { dptr->score += f_str->pamh1[aa0[tpos]]; dptr->stop = lpos; } } else { dptr->score = f_str->pamh2[lhval]; dptr->start = (dptr->stop = lpos) - kt1; } } /* end tpos */ #ifdef ALLOCN0 /* reinitialize diag structure */ loopl: if ((dptr = &f_str->diag[lpos % nd])->score > f_str->lowscor) savemax (dptr, lpos, f_str); dptr->stop = -1; dptr->dmax = NULL; dptr->score = 0; #endif } /* end lpos */ #ifdef ALLOCN0 for (tpos = 0, dpos = noff + n1 - 1; tpos < n0; tpos++, dpos--) { if ((dptr = &f_str->diag[dpos % nd])->score > f_str->lowscor) savemax (dptr, dpos, f_str); } #else for (dptr = f_str->diag; dptr < dpmax;) { if (dptr->score > f_str->lowscor) savemax (dptr, f_str); dptr->stop = -1; dptr->dmax = NULL; dptr++->score = 0; } f_str->ndo = nd; #endif /* at this point all of the elements of aa1[lpos] have been searched for elements of aa0[tpos] with the results in diag[dpos] */ for (nsave = 0, vmptr = f_str->vmax; vmptr < &f_str->vmax[MAXSAV]; vmptr++) { /* fprintf(stderr,"0: %4d-%4d 1: %4d-%4d dp: %d score: %d\n", noff+vmptr->start-vmptr->dp, noff+vmptr->stop-vmptr->dp, vmptr->start,vmptr->stop, vmptr->dp,vmptr->score); */ if (vmptr->score > 0) { vmptr->score = spam (aa0, aa1, vmptr, ppst->pam2[ip], xdrop, noff,do_extend); f_str->vptr[nsave++] = vmptr; } } if (nsave <= 0) { rst->score[0] = rst->score[1] = rst->score[2] = 0; return; } /* fprintf(stderr,"n0: %d; n1: %d; noff: %d\n",n0,n1,noff); for (ib=0; ibvptr[ib]->start-f_str->vptr[ib]->dp, noff+f_str->vptr[ib]->stop-f_str->vptr[ib]->dp, f_str->vptr[ib]->start,f_str->vptr[ib]->stop, f_str->vptr[ib]->dp,f_str->vptr[ib]->score); } fprintf(stderr,"---\n"); */ scor = sconn (f_str->vptr, nsave, ppst->param_u.fa.cgap, ppst->param_u.fa.pgap, noff); for (vmptr=f_str->vptr[0],ib=1; ibvptr[ib]->score > vmptr->score) vmptr=f_str->vptr[ib]; /* kssort (f_str->vptr, nsave); */ rst->score[1] = vmptr->score; rst->score[0] = max (scor, vmptr->score); rst->score[2] = rst->score[0]; /* initn */ if (ppst->param_u.fa.optflag) { if (rst->score[0] > ppst->param_u.fa.optcut) rst->score[2] = dmatch (aa0, n0, aa1, n1, *hoff=noff - vmptr->dp, ppst->param_u.fa.optwid, ppst->pam2[ip], ppst->gdelval,ppst->ggapval,f_str); } } void do_work (const unsigned char *aa0, int n0, const unsigned char *aa1, int n1, int frame, struct pstruct *ppst, struct f_struct *f_str, int qr_flg, struct rstruct *rst) { int hoff, n10; double lambda, H; rst->score[0] = rst->score[1] = rst->score[2] = 0; rst->escore = 1.0; rst->segnum = rst->seglen = 1; if (n1 < ppst->param_u.fa.ktup) return; #ifdef TFASTA n10=aatran(aa1,f_str->aa1x,n1,frame); do_fasta (aa0, n0, f_str->aa1x, n10, ppst, f_str, rst, &hoff); #else /* FASTA */ do_fasta (aa0, n0, aa1, n1, ppst, f_str, rst, &hoff); #endif #ifndef TFASTA if((ppst->zsflag%10) == 6 && (do_karlin(aa1, n1, ppst->pam2[0], ppst,f_str->aa0_f, f_str->kar_p, &lambda, &H)>0)) { rst->comp = 1.0/lambda; rst->H = H; } else {rst->comp = rst->H = -1.0;} #else rst->comp = rst->H = -1.0; #endif } void do_opt (const unsigned char *aa0, int n0, const unsigned char *aa1, int n1, int frame, struct pstruct *ppst, struct f_struct *f_str, struct rstruct *rst) { int optflag, tscore, hoff, n10; optflag = ppst->param_u.fa.optflag; ppst->param_u.fa.optflag = 1; #ifdef TFASTA n10=aatran(aa1,f_str->aa1x,n1,frame); do_fasta (aa0, n0, f_str->aa1x, n10, ppst, f_str, rst, &hoff); #else /* FASTA */ do_fasta(aa0,n0,aa1,n1,ppst,f_str,rst, &hoff); #endif ppst->param_u.fa.optflag = optflag; } #ifdef ALLOCN0 void savemax (dptr, dpos, f_str) register struct dstruct *dptr; int dpos; struct f_struct *f_str; { register struct savestr *vmptr; register int i; #else void savemax (dptr, f_str) register struct dstruct *dptr; struct f_struct *f_str; { register int dpos; register struct savestr *vmptr; register int i; dpos = (int) (dptr - f_str->diag); #endif /* check to see if this is the continuation of a run that is already saved */ if ((vmptr = dptr->dmax) != NULL && vmptr->dp == dpos && vmptr->start == dptr->start) { vmptr->stop = dptr->stop; if ((i = dptr->score) <= vmptr->score) return; vmptr->score = i; if (vmptr != f_str->lowmax) return; } else { i = f_str->lowmax->score = dptr->score; f_str->lowmax->dp = dpos; f_str->lowmax->start = dptr->start; f_str->lowmax->stop = dptr->stop; dptr->dmax = f_str->lowmax; } for (vmptr = f_str->vmax; vmptr < &f_str->vmax[MAXSAV]; vmptr++) if (vmptr->score < i) { i = vmptr->score; f_str->lowmax = vmptr; } f_str->lowscor = i; } int spam (const unsigned char *aa0, const unsigned char *aa1, struct savestr *dmax, int **pam2, int xdrop, int noff, int do_extend) { register int lpos, tot; register const unsigned char *aa0p, *aa1p; int drop_thresh; struct { int start, stop, score; } curv, maxv; aa1p = &aa1[lpos= dmax->start]; /* get the start of lib seq */ aa0p = &aa0[lpos - dmax->dp + noff]; /* start of query */ #ifdef DEBUG /* also add check in calling routine */ if (aa0p < aa0) { return -99; } #endif curv.start = lpos; /* start index in lib seq */ tot = curv.score = maxv.score = 0; for (; lpos <= dmax->stop; lpos++) { tot += pam2[*aa0p++][*aa1p++]; if (tot > curv.score) { /* update current score */ curv.stop = lpos; curv.score = tot; } else if (tot < 0) { if (curv.score > maxv.score) { /* save score, start, stop */ maxv.start = curv.start; maxv.stop = curv.stop; maxv.score = curv.score; } tot = curv.score = 0; /* reset running score */ curv.start = lpos+1; /* reset start */ if(lpos >= dmax->stop) break; /* if the zero is beyond stop, quit */ } } if (curv.score > maxv.score) { maxv.start = curv.start; maxv.stop = curv.stop; maxv.score = curv.score; } #ifndef NOSPAM_EXT /* now check to see if the score gets better by extending */ if (do_extend && maxv.score > xdrop) { if (maxv.stop == dmax->stop) { tot = maxv.score; drop_thresh = maxv.score - xdrop; aa1p = &aa1[lpos= dmax->stop]; aa0p = &aa0[lpos - dmax->dp + noff]; while (tot > drop_thresh ) { ++lpos; tot += pam2[*(++aa0p)][*(++aa1p)]; if (tot > maxv.score) { maxv.start = lpos; maxv.score = tot; drop_thresh = tot - xdrop; } } } /* scan backwards now */ if (maxv.start == dmax->start) { tot = maxv.score; drop_thresh = maxv.score - xdrop; aa1p = &aa1[lpos= dmax->start]; aa0p = &aa0[lpos - dmax->dp + noff]; while (tot > drop_thresh) { --lpos; tot += pam2[*(--aa0p)][*(--aa1p)]; if (tot > maxv.score) { maxv.start = lpos; maxv.score = tot; drop_thresh = tot - xdrop; } } } } #endif /* if (maxv.start != dmax->start || maxv.stop != dmax->stop) printf(" new region: %3d %3d %3d %3d\n",maxv.start, dmax->start,maxv.stop,dmax->stop); */ dmax->start = maxv.start; dmax->stop = maxv.stop; return maxv.score; } int sconn (struct savestr **v, int n, int cgap, int pgap, int noff) { int i, si; struct slink { int score; struct savestr *vp; struct slink *next; } *start, *sl, *sj, *so, sarr[MAXSAV]; int lstart, tstart, plstop, ptstop; /* sort the score left to right in lib pos */ kpsort (v, n); start = NULL; /* for the remaining runs, see if they fit */ for (i = 0, si = 0; i < n; i++) { /* if the score is less than the gap penalty, it never helps */ if (v[i]->score < cgap) continue; lstart = v[i]->start; tstart = lstart - v[i]->dp + noff; /* put the run in the group */ sarr[si].vp = v[i]; sarr[si].score = v[i]->score; sarr[si].next = NULL; /* if it fits, then increase the score */ for (sl = start; sl != NULL; sl = sl->next) { plstop = sl->vp->stop; ptstop = plstop - sl->vp->dp + noff; if (plstop < lstart && ptstop < tstart) { sarr[si].score = sl->score + v[i]->score + pgap; break; } } /* now recalculate where the score fits */ if (start == NULL) start = &sarr[si]; else for (sj = start, so = NULL; sj != NULL; sj = sj->next) { if (sarr[si].score > sj->score) { sarr[si].next = sj; if (so != NULL) so->next = &sarr[si]; else start = &sarr[si]; break; } so = sj; } si++; } if (start != NULL) return (start->score); else return (0); } void kssort (v, n) struct savestr *v[]; int n; { int gap, i, j; struct savestr *tmp; for (gap = n / 2; gap > 0; gap /= 2) for (i = gap; i < n; i++) for (j = i - gap; j >= 0; j -= gap) { if (v[j]->score >= v[j + gap]->score) break; tmp = v[j]; v[j] = v[j + gap]; v[j + gap] = tmp; } } void kpsort (v, n) struct savestr *v[]; int n; { int gap, i, j; struct savestr *tmp; for (gap = n / 2; gap > 0; gap /= 2) for (i = gap; i < n; i++) for (j = i - gap; j >= 0; j -= gap) { if (v[j]->start <= v[j + gap]->start) break; tmp = v[j]; v[j] = v[j + gap]; v[j + gap] = tmp; } } static int dmatch (const unsigned char *aa0, int n0, const unsigned char *aa1, int n1, int hoff, int window, int **pam2, int gdelval, int ggapval, struct f_struct *f_str) { int low, up; window = min (n1, window); /* hoff is the offset found from aa1 to seq 2 by hmatch */ low = -window/2-hoff; up = low+window; return FLOCAL_ALIGN(aa0-1,aa1-1,n0,n1, low, up, pam2, #ifdef OLD_FASTA_GAP -(gdelval-ggapval), #else -gdelval, #endif -ggapval,window,f_str); } /* A PACKAGE FOR LOCALLY ALIGNING TWO SEQUENCES WITHIN A BAND: To invoke, call LOCAL_ALIGN(A,B,M,N,L,U,W,G,H,MW). The parameters are explained as follows: A, B : two sequences to be aligned M : the length of sequence A N : the length of sequence B L : lower bound of the band U : upper bound of the band W : scoring table for matches and mismatches G : gap-opening penalty H : gap-extension penalty MW : maximum window size */ #include #define MININT -9999999 static int FLOCAL_ALIGN(const unsigned char *A, const unsigned char *B, int M, int N, int low, int up, int **W, int G,int H, int MW, struct f_struct *f_str) { int band; register struct bdstr *bssp; int i, j, si, ei; int c, d, e, m; int leftd, rightd; int best_score; int *wa, curd; int ib; bssp = f_str->bss; m = G+H; low = max(-M, low); up = min(N, up); if (N <= 0) return 0; if (M <= 0) return 0; band = up-low+1; if (band < 1) { fprintf(stderr,"low > up is unacceptable!: M: %d N: %d l/u: %d/%d\n", M, N, low, up); return 0; } if (low > 0) leftd = 1; else if (up < 0) leftd = band; else leftd = 1-low; rightd = band; si = max(0,-up); /* start index -1 */ ei = min(M,N-low); /* end index */ bssp[leftd].CC = 0; for (j = leftd+1; j <= rightd; j++) { bssp[j].CC = 0; bssp[j].DD = -G; } bssp[rightd+1].CC = MININT; bssp[rightd+1].DD = MININT; best_score = 0; bssp[leftd-1].CC = MININT; bssp[leftd].DD = -G; for (i = si+1; i <= ei; i++) { if (i > N-up) rightd--; if (leftd > 1) leftd--; wa = W[A[i]]; if ((c = bssp[leftd+1].CC-m) > (d = bssp[leftd+1].DD-H)) d = c; if ((ib = leftd+low-1+i ) > 0) c = bssp[leftd].CC+wa[B[ib]]; if (d > c) c = d; if (c < 0) c = 0; e = c-G; bssp[leftd].DD = d; bssp[leftd].CC = c; if (c > best_score) best_score = c; for (curd=leftd+1; curd <= rightd; curd++) { if ((c = c-m) > (e = e-H)) e = c; if ((c = bssp[curd+1].CC-m) > (d = bssp[curd+1].DD-H)) d = c; c = bssp[curd].CC + wa[B[curd+low-1+i]]; if (e > c) c = e; if (d > c) c = d; if (c < 0) c = 0; bssp[curd].CC = c; bssp[curd].DD = d; if (c > best_score) best_score = c; } } return best_score; } /* ckalloc - allocate space; check for success */ char *ckalloc(size_t amount) { char *p; if ((p = malloc( (unsigned) amount)) == NULL) w_abort("Ran out of memory.",""); return(p); } /* calculate the 100% identical score */ int shscore(const unsigned char *aa0, int n0, int **pam2) { int i, sum; for (i=0,sum=0; iss; waa = f_str->waa1; #ifdef OLD_FASTA_GAP q = -(ppst->gdelval - ppst->ggapval); #else q = -ppst->gdelval; #endif r = -ppst->ggapval; m = q + r; /* initialize 0th row */ for (ssj=ss; ssjH = 0; ssj->E = -q; } score = I = J = 0; aa1p = aa1; i = 0; while (*aa1p) { h = p = 0; f = -q; pwaa = waa + (*aa1p++ * n0); for (ssj = ss, aa0p = aa0; ssj < ss+n0; ssj++) { if ((h = h - m) > (f = f - r)) f = h; if ((h = ssj->H - m) > (e = ssj->E - r)) e = h; h = p + *pwaa++; if (h < 0 ) h = 0; if (h < f ) h = f; if (h < e ) h = e; p = ssj->H; ssj->H = h; ssj->E = e; if (h > score) { score = h; I = i; J = (int)(ssj-ss); } } i++; } /* done with forward pass */ if (score <= 0) return 0; /* to get the start point, go backwards */ cost = K = L = 0; for (ssj=ss+J; ssj>=ss; ssj--) ssj->H= ssj->E= -1; for (i=I; i>=0; i--) { h = f = -1; p = (i == I) ? 0 : -1; ssj = ss+J; /* bug in compiler */ for (aa0p = &aa0[J]; ssj>=ss; ssj--,aa0p--) { f = max (f,h-q)-r; ssj->E=max(ssj->E,ssj->H-q)-r; h = max(max(ssj->E,f),p+ppst->pam2[0][*aa0p][aa1[i]]); p = ssj->H; ssj->H=h; if (h > cost) { cost = h; K = i; L = (int)(ssj-ss); if (cost >= score) goto found; } } } found: /* printf(" %d: L: %3d-%3d/%3d; K: %3d-%3d/%3d\n",score,L,J,n0,K,I,n1); */ /* in the f_str version, the *res array is already allocated at 4*n0/3 */ a_res->max0 = J+1; a_res->min0 = L; a_res->max1 = I+1; a_res->min1 = K; /* the seq array arguments in this call have been reversed to allow assymetric scoring matrices - this affects the score decoding, and allocation of the score row matrix */ ALIGN(&aa0[L-1],&aa1[K-1],J-L+1,I-K+1,ppst->pam2[0],q,r,a_res->res,&a_res->nres,f_str); /* DISPLAY(&aa1[K-1],&aa0[L-1],I-K+1,J-L+1,res,L,K,ppst->sq); */ return score; } static int CHECK_SCORE(const unsigned char *A, const unsigned char *B, int M, int N, int *S, int **W, int G, int H, int *nres); #define gap(k) ((k) <= 0 ? 0 : g+h*(k)) /* k-symbol indel cost */ /* static int *sapp; */ /* Current script append ptr */ /* static int last; */ /* Last script op appended */ /* Append "Delete k" op */ #define DEL(k) \ { if (*last < 0) \ *last = (*sapp)[-1] -= (k); \ else { \ *last = (*sapp)[0] = -(k); \ (*sapp)++; \ } \ } /* Append "Insert k" op */ #define INS(k) \ { if (*last > 0) \ *last = (*sapp)[-1] += (k); \ else { \ *last = (*sapp)[0] = (k); \ (*sapp)++; \ } \ } #define REP { *last = (*sapp)[0] = 0; (*sapp)++;} /* Append "Replace" op */ /* align(A,B,M,N,tb,te) returns the cost of an optimum conversion between A[1..M] and B[1..N] that begins(ends) with a delete if tb(te) is zero and appends such a conversion to the current script. */ static int nw_align(const unsigned char *A, const unsigned char *B, int M, int N, int tb, int te, int **w, int g, int h, struct f_struct *f_str, int **sapp, int *last) { int midi, midj, type; /* Midpoint, type, and cost */ int midc; register int i, j; register int c, e, d, s; int m, t, *wa; struct swstr *f_ss, *r_ss; m = g + h; f_ss = f_str->f_ss; r_ss = f_str->r_ss; /* Boundary cases: M <= 1 or N == 0 */ if (N <= 0) { if (M > 0) {DEL(M)} return -gap(M); } if (M <= 1) { if (M <= 0) { INS(N); return -gap(N); } if (tb < te) tb = te; midc = (tb-h) - gap(N); midj = 0; wa = w[A[1]]; /* in the original version of this code, A[] is the second sequence */ for (j = 1; j <= N; j++) { c = -gap(j-1) + wa[B[j]] - gap(N-j); if (c > midc) { midc = c; midj = j; } } if (midj == 0) { DEL(1) INS(N) } else { if (midj > 1) { INS(midj-1) } REP if (midj < N) { INS(N-midj) } } return midc; } /* Divide: Find optimum midpoint (midi,midj) of cost midc */ midi = M/2; /* Forward phase: */ f_ss[0].H = 0; /* Compute H(M/2,k) & E(M/2,k) for all k */ t = -g; for (j = 1; j <= N; j++) { f_ss[j].H = t = t-h; f_ss[j].E = t-g; } t = tb; for (i = 1; i <= midi; i++) { s = f_ss[0].H; f_ss[0].H = c = t = t-h; e = t-g; wa = w[A[i]]; for (j = 1; j <= N; j++) { if ((c = c - m) > (e = e - h)) e = c; if ((c = f_ss[j].H - m) > (d = f_ss[j].E - h)) d = c; c = s + wa[B[j]]; if (e > c) c = e; if (d > c) c = d; s = f_ss[j].H; f_ss[j].H = c; f_ss[j].E = d; } } f_ss[0].E = f_ss[0].H; r_ss[N].H = 0; /* Reverse phase: */ t = -g; /* Compute R(M/2,k) & S(M/2,k) for all k */ for (j = N-1; j >= 0; j--) { r_ss[j].H = t = t-h; r_ss[j].E = t-g; } t = te; for (i = M-1; i >= midi; i--) { s = r_ss[N].H; r_ss[N].H = c = t = t-h; e = t-g; wa = w[A[i+1]]; for (j = N-1; j >= 0; j--) { if ((c = c - m) > (e = e - h)) e = c; if ((c = r_ss[j].H - m) > (d = r_ss[j].E - h)) d = c; c = s + wa[B[j+1]]; if (e > c) c = e; if (d > c) c = d; s = r_ss[j].H; r_ss[j].H = c; r_ss[j].E = d; } } r_ss[N].E = r_ss[N].H; midc = f_ss[0].H+r_ss[0].H; /* Find optimal midpoint */ midj = 0; type = 1; for (j = 0; j <= N; j++) if ((c = f_ss[j].H + r_ss[j].H) >= midc) if (c > midc || (f_ss[j].H != f_ss[j].E && r_ss[j].H == r_ss[j].E)) { midc = c; midj = j; } for (j = N; j >= 0; j--) if ((c = f_ss[j].E + r_ss[j].E + g) > midc) { midc = c; midj = j; type = 2; } /* Conquer: recursively around midpoint */ if (type == 1) { nw_align(A,B,midi,midj,tb,-g,w,g,h,f_str, sapp, last); nw_align(A+midi,B+midj,M-midi,N-midj,-g,te,w,g,h,f_str,sapp, last); } else { nw_align(A,B,midi-1,midj,tb,0,w,g,h,f_str, sapp, last); DEL(2); nw_align(A+midi+1,B+midj,M-midi-1,N-midj,0,te,w,g,h,f_str, sapp, last); } return midc; } /* Interface and top level of comparator */ static int ALIGN(const unsigned char *A, const unsigned char *B, int M, int N, int **W, int G, int H, int *S, int *nS, struct f_struct *f_str) { int c, ck; struct swstr *f_ss, *r_ss; int *sapp, last; sapp = S; last = 0; if ((f_ss = (struct swstr *) calloc (N+2, sizeof (struct swstr))) == NULL) { fprintf (stderr, "cannot allocate f_ss array %3d\n", N+2); exit (1); } f_ss++; f_str->f_ss = f_ss; if ((r_ss = (struct swstr *) calloc (N+2, sizeof (struct swstr))) == NULL) { fprintf (stderr, "cannot allocate r_ss array %3d\n", N+2); exit (1); } r_ss++; f_str->r_ss = r_ss; c = nw_align(A,B,M,N,-G,-G,W,G,H,f_str,&sapp, &last); /* OK, do it */ ck = CHECK_SCORE(A,B,M,N,S,W,G,H,nS); if (c != ck) fprintf(stderr,"Check_score error %d != %d\n",c,ck); f_ss--; r_ss--; free(r_ss); free(f_ss); return c; } /* Alignment display routine */ static char ALINE[51], BLINE[51], CLINE[51]; void DISPLAY(unsigned char *A, unsigned char *B, int M, int N, int *S, int AP, int BP, char *sq) { register char *a, *b, *c; register int i, j, op; int lines, ap, bp; i = j = op = lines = 0; ap = AP; bp = BP; a = ALINE; b = BLINE; c = CLINE; while (i < M || j < N) { if (op == 0 && *S == 0) { op = *S++; *a = sq[A[++i]]; *b = sq[B[++j]]; *c++ = (*a++ == *b++) ? '|' : ' '; } else { if (op == 0) op = *S++; if (op > 0) { *a++ = ' '; *b++ = sq[B[++j]]; op--; } else { *a++ = sq[A[++i]]; *b++ = ' '; op++; } *c++ = '-'; } if (a >= ALINE+50 || (i >= M && j >= N)) { *a = *b = *c = '\0'; printf("\n%5d ",50*lines++); for (b = ALINE+10; b <= a; b += 10) printf(" . :"); if (b <= a+5) printf(" ."); printf("\n%5d %s\n %s\n%5d %s\n",ap,ALINE,CLINE,bp,BLINE); ap = AP + i; bp = BP + j; a = ALINE; b = BLINE; c = CLINE; } } } /* CHECK_SCORE - return the score of the alignment stored in S */ static int CHECK_SCORE(const unsigned char *A, const unsigned char *B, int M, int N, int *S, int **w, int g, int h, int *nres) { register int i, j, op, nc; int score; score = i = j = op = nc = 0; while (i < M || j < N) { op = *S++; if (op == 0) { score = w[A[++i]][B[++j]] + score; nc++; /* fprintf(stderr,"=%4d %4d %4d %4d\n",i,j,w[A[i]][B[i]],score); */ } else if (op > 0) { score = score - (g+op*h); /* fprintf(stderr,">%4d %4d %4d %4d\n",i,j,-(g+op*h),score); */ j += op; nc += op; } else { score = score - (g-op*h); /* fprintf(stderr,"<%4d %4d %4d %4d\n",i,j,-(g-op*h),score); */ i -= op; nc -= op; } } *nres = nc; return score; } static int BCHECK_SCORE(const unsigned char *A, const unsigned char *B, int M, int N, int *S, int **w, int g, int h, int *nres) { register int i, j, op, nc; int *Ssave; int score; score = i = j = op = nc = 0; Ssave = S; while (i < M || j < N) { op = *S++; if (op == 0) { score = w[A[++i]][B[++j]] + score; nc++; /* fprintf(stderr,"op0 %4d %4d %4d %4d\n",i,j,w[A[i]][B[i]],score); */ } else if (op > 0) { score = score - (g+op*h); /* fprintf(stderr,"op> %4d %4d %4d %4d %4d\n",i,j,op,-(g+op*h),score); */ j += op; nc += op; } else { score = score - (g-op*h); /* fprintf(stderr,"op< %4d %4d %4d %4d %4d\n",i,j,op,-(g-op*h),score); */ i -= op; nc -= op; } } *nres = nc; return score; } /* A PACKAGE FOR LOCALLY ALIGNING TWO SEQUENCES WITHIN A BAND: To invoke, call LOCAL_ALIGN(A,B,M,N,L,U,W,G,H,S,dflag,&SI,&SJ,&EI,&EJ,MW). The parameters are explained as follows: A, B : two sequences to be aligned M : the length of sequence A N : the length of sequence B L : lower bound of the band U : upper bound of the band W : scoring table for matches and mismatches G : gap-opening penalty H : gap-extension penalty dflag : 0 - no display or backward pass *SI : starting position of sequence A in the optimal local alignment *SJ : starting position of sequence B in the optimal local alignment *EI : ending position of sequence A in the optimal local alignment *EJ : ending position of sequence B in the optimal local alignment MW : maximum window size */ int bd_walign (const unsigned char *aa0, int n0, const unsigned char *aa1, int n1, struct pstruct *ppst, struct f_struct *f_str, int hoff, struct a_res_str *a_res) { int low, up, score; int min0, min1, max0, max1; int window; window = min (n1, ppst->param_u.fa.optwid); /* hoff is the offset found from aa1 to seq 2 by hmatch */ low = -window/2-hoff; up = low+window; score=LOCAL_ALIGN(aa0-1,aa1-1,n0,n1, low, up, ppst->pam2[0], #ifdef OLD_FASTA_GAP -(ppst->gdelval-ppst->ggapval), #else -ppst->gdelval, #endif -ppst->ggapval, &min0,&min1,&max0,&max1,ppst->param_u.fa.optwid,f_str); if (score <=0) { fprintf(stderr,"n0/n1: %d/%d hoff: %d window: %d\n", n0, n1, hoff, window); return 0; } /* fprintf(stderr," ALIGN: start0: %d start1: %d stop0: %d stop1: %d, bot: %d top: %d, win: %d MX %d\n", min0-1,min1-1,max0-min0+1,max1-min1+1,low-(min1-min0),up-(min1-min0), ppst->param_u.fa.optwid,n0); */ a_res->min0 = min0-1; a_res->min1 = min1-1; a_res->max0 = max0; a_res->max1 = max1; B_ALIGN(aa0-1+min0-1,aa1-1+min1-1,max0-min0+1,max1-min1+1, low-(min1-min0),up-(min1-min0), ppst->pam2[0], #ifdef OLD_FASTA_GAP -(ppst->gdelval-ppst->ggapval), #else -ppst->gdelval, #endif -ppst->ggapval, a_res->res,&a_res->nres,ppst->param_u.fa.optwid,n0,f_str->bss); return score; } static int LOCAL_ALIGN(const unsigned char *A, const unsigned char *B, int M, int N, int low, int up, int **W, int G,int H, int *psi, int *psj, int *pei, int *pej, int MW, struct f_struct *f_str) { int band; register struct bdstr *bssp; int i, j, si, ei; int c, d, e, t, m; int leftd, rightd; int best_score, starti, startj, endi, endj; int *wa, curd; int ib; char flag; bssp = f_str->bss; m = G+H; low = max(-M, low); up = min(N, up); if (N <= 0) { *psi = *psj = *pei = *pej; return 0; } if (M <= 0) { *psi = *psj = *pei = *pej; return 0; } band = up-low+1; if (band < 1) { fprintf(stderr,"low > up is unacceptable!: M: %d N: %d l/u: %d/%d\n", M, N, low, up); return -1; } j = (MW + 2 + 2) * sizeof(struct bdstr); /* already done by init_work(); if (f_str->bss==NULL) f_str->bss = (struct bdstr *) ckalloc(j); */ if (low > 0) leftd = 1; else if (up < 0) leftd = band; else leftd = 1-low; rightd = band; si = max(0,-up); ei = min(M,N-low); bssp[leftd].CC = 0; for (j = leftd+1; j <= rightd; j++) { bssp[j].CC = 0; bssp[j].DD = -G; } bssp[rightd+1].CC = MININT; bssp[rightd+1].DD = MININT; best_score = 0; endi = si; endj = si+low; bssp[leftd-1].CC = MININT; bssp[leftd].DD = -G; for (i = si+1; i <= ei; i++) { if (i > N-up) rightd--; if (leftd > 1) leftd--; wa = W[A[i]]; if ((c = bssp[leftd+1].CC-m) > (d = bssp[leftd+1].DD-H)) d = c; if ((ib = leftd+low-1+i ) > 0) c = bssp[leftd].CC+wa[B[ib]]; /* if (ib > N) fprintf(stderr,"B[%d] out of range %d\n",ib,N); */ if (d > c) c = d; if (c < 0) c = 0; e = c-G; bssp[leftd].DD = d; bssp[leftd].CC = c; if (c > best_score) { best_score = c; endi = i; endj = ib; } for (curd=leftd+1; curd <= rightd; curd++) { if ((c = c-m) > (e = e-H)) e = c; if ((c = bssp[curd+1].CC-m) > (d = bssp[curd+1].DD-H)) d = c; /* if ((ib=curd+low-1+i) <= 0 || ib > N) fprintf(stderr,"B[%d]:%d\n",ib,B[ib]); */ c = bssp[curd].CC + wa[B[curd+low-1+i]]; if (e > c) c = e; if (d > c) c = d; if (c < 0) c = 0; bssp[curd].CC = c; bssp[curd].DD = d; if (c > best_score) { best_score = c; endi = i; endj = curd+low-1+i; } } } leftd = max(1,-endi-low+1); rightd = band-(up-(endj-endi)); bssp[rightd].CC = 0; t = -G; for (j = rightd-1; j >= leftd; j--) { bssp[j].CC = t = t-H; bssp[j].DD = t-G; } for (j = rightd+1; j <= band; ++j) bssp[j].CC = MININT; bssp[leftd-1].CC = bssp[leftd-1].DD = MININT; bssp[rightd].DD = -G; flag = 0; for (i = endi; i >= 1; i--) { if (i+low <= 0) leftd++; if (rightd < band) rightd++; wa = W[A[i]]; if ((c = bssp[rightd-1].CC-m) > (d = bssp[rightd-1].DD-H)) d = c; if ((ib = rightd+low-1+i) <= N) c = bssp[rightd].CC+wa[B[ib]]; /* if (ib <= 0) fprintf(stderr,"rB[%d] <1\n",ib); */ if (d > c) c = d; e = c-G; bssp[rightd].DD = d; bssp[rightd].CC = c; if (c == best_score) { starti = i; startj = ib; flag = 1; break; } for (curd=rightd-1; curd >= leftd; curd--) { if ((c = c-m) > (e = e-H)) e = c; if ((c = bssp[curd-1].CC-m) > (d = bssp[curd-1].DD-H)) d = c; /* if ((ib=curd+low-1+i) <= 0 || ib > N) fprintf(stderr,"i: %d, B[%d]:%d\n",i,ib,B[ib]); */ c = bssp[curd].CC + wa[B[curd+low-1+i]]; if (e > c) c = e; if (d > c) c = d; bssp[curd].CC = c; bssp[curd].DD = d; if (c == best_score) { starti = i; startj = curd+low-1+i; flag = 1; break; } } if (flag == 1) break; } if (starti < 0 || starti > M || startj < 0 || startj > N) { printf("starti=%d, startj=%d\n",starti,startj); *psi = *psj = *pei = *pej; exit(1); } *psi = starti; *psj = startj; *pei = endi; *pej = endj; return best_score; } /* A PACKAGE FOR GLOBALLY ALIGNING TWO SEQUENCES WITHIN A BAND: To invoke, call B_ALIGN(A,B,M,N,L,U,W,G,H,S,MW,MX). The parameters are explained as follows: A, B : two sequences to be aligned M : the length of sequence A N : the length of sequence B L : lower bound of the band U : upper bound of the band W : scoring table for matches and mismatches G : gap-opening penalty H : gap-extension penalty S : script for DISPLAY routine MW : maximum window size MX : maximum length sequence M to be aligned */ static int IP; static int *MP[3]; /* save crossing points */ static int *FP; /* forward dividing points */ static char *MT[3]; /* 0: rep, 1: del, 2: ins */ static char *FT; /* bg_align(A,B,M,N,up,low,tb,te) returns the cost of an optimum conversion between A[1..M] and B[1..N] and appends such a conversion to the current script. tb(te)= 1 no gap-open penalty if the conversion begins(ends) with a delete. tb(te)= 2 no gap-open penalty if the conversion begins(ends) with an insert. */ static int bg_align(const unsigned char *A, const unsigned char *B, int M, int N, int low, int up, int tb, int te, int **w, int g, int h, struct bdstr *bss, int **sapp, int *last) { int rmid, k, l, r, v, kt; int t1, t2, t3; { int band, midd; int leftd, rightd; /* for CC, DD, CP and DP */ register int curd; /* current index for CC, DD CP and DP */ register int i, j; register int c, d, e; int t, fr, *wa, ib, m; /* Boundary cases: M <= 0 , N <= 0, or up-low <= 0 */ if (N <= 0) { if (M > 0) { DEL(M) } return 0; } if (M <= 0) { INS(N) return 0; } if ((band = up-low+1) <= 1) { for (i = 1; i <= M; i++) { REP } return 0; } /* Divide: Find all crossing points */ /* Initialization */ m = g + h; midd = band/2 + 1; rmid = low + midd - 1; leftd = 1-low; rightd = up-low+1; if (leftd < midd) { fr = -1; for (j = 0; j < midd; j++) bss[j].CP = bss[j].DP = -1; for (j = midd; j <= rightd; j++) { bss[j].CP = bss[j].DP = 0; } MP[0][0] = -1; MP[1][0] = -1; MP[2][0] = -1; MT[0][0] = MT[1][0] = MT[2][0] = 0; } else if (leftd > midd) { fr = leftd-midd; for (j = 0; j <= midd; j++) { bss[j].CP = bss[j].DP = fr; } for (j = midd+1; j <= rightd; j++) bss[j].CP = bss[j].DP = -1; MP[0][fr] = -1; MP[1][fr] = -1; MP[2][fr] = -1; MT[0][fr] = MT[1][fr] = MT[2][fr] = 0; } else { fr = 0; for (j = 0; j < midd; j++) { bss[j].CP = bss[j].DP = 0; } for (j = midd; j <= rightd; j++) { bss[j].CP = bss[j].DP = 0; } MP[0][0] = -1; MP[1][0] = -1; MP[2][0] = -1; MT[0][0] = MT[1][0] = MT[2][0] = 0; } bss[leftd].CC = 0; if (tb == 2) t = 0; else t = -g; for (j = leftd+1; j <= rightd; j++) { bss[j].CC = t = t-h; bss[j].DD = t-g; } bss[rightd+1].CC = MININT; bss[rightd+1].DD = MININT; if (tb == 1) bss[leftd].DD = 0; else bss[leftd].DD = -g; bss[leftd-1].CC = MININT; for (i = 1; i <= M; i++) { if (i > N-up) rightd--; if (leftd > 1) leftd--; wa = w[A[i]]; if ((c = bss[leftd+1].CC-m) > (d = bss[leftd+1].DD-h)) { d = c; bss[leftd].DP = bss[leftd+1].CP; } else bss[leftd].DP = bss[leftd+1].DP; if ((ib = leftd+low-1+i) > 0) c = bss[leftd].CC+wa[B[ib]]; if (d > c || ib <= 0) { c = d; bss[leftd].CP = bss[leftd].DP; } e = c-g; bss[leftd].DD = d; bss[leftd].CC = c; IP = bss[leftd].CP; if (leftd == midd) bss[leftd].CP = bss[leftd].DP = IP = i; for (curd=leftd+1; curd <= rightd; curd++) { if (curd != midd) { if ((c = c-m) > (e = e-h)) { e = c; IP = bss[curd-1].CP; } /* otherwise, IP is unchanged */ if ((c = bss[curd+1].CC-m) > (d = bss[curd+1].DD-h)) { d = c; bss[curd].DP = bss[curd+1].CP; } else { bss[curd].DP = bss[curd+1].DP; } c = bss[curd].CC + wa[B[curd+low-1+i]]; if (c < d || c < e) { if (e > d) { c = e; bss[curd].CP = IP; } else { c = d; bss[curd].CP = bss[curd].DP; } } /* otherwise, CP is unchanged */ bss[curd].CC = c; bss[curd].DD = d; } else { if ((c = c-m) > (e = e-h)) { e = c; MP[1][i] = bss[curd-1].CP; MT[1][i] = 2; } else { MP[1][i] = IP; MT[1][i] = 2; } if ((c = bss[curd+1].CC-m) > (d = bss[curd+1].DD-h)) { d = c; MP[2][i] = bss[curd+1].CP; MT[2][i] = 1; } else { MP[2][i] = bss[curd+1].DP; MT[2][i] = 1; } c = bss[curd].CC + wa[B[curd+low-1+i]]; if (c < d || c < e) { if (e > d) { c = e; MP[0][i] = MP[1][i]; MT[0][i] = 2; } else { c = d; MP[0][i] = MP[2][i]; MT[0][i] = 1; } } else { MP[0][i] = i-1; MT[0][i] = 0; } if (c-g > e) { MP[1][i] = MP[0][i]; MT[1][i] = MT[0][i]; } if (c-g > d) { MP[2][i] = MP[0][i]; MT[2][i] = MT[0][i]; } bss[curd].CP = bss[curd].DP = IP = i; bss[curd].CC = c; bss[curd].DD = d; } } } /* decide which path to be traced back */ if (te == 1 && d+g > c) { k = bss[rightd].DP; l = 2; } else if (te == 2 && e+g > c) { k = IP; l = 1; } else { k = bss[rightd].CP; l = 0; } if (rmid > N-M) l = 2; else if (rmid < N-M) l = 1; v = c; } /* Conquer: Solve subproblems recursively */ /* trace back */ r = -1; for (; k > -1; r=k, k=MP[l][r], l=MT[l][r]){ FP[k] = r; FT[k] = l; /* l=0,1,2 */ } /* forward dividing */ if (r == -1) { /* optimal alignment did not cross the middle diagonal */ if (rmid < 0) { bg_align(A,B,M,N,rmid+1,up,tb,te,w,g,h,bss, sapp, last); } else { bg_align(A,B,M,N,low,rmid-1,tb,te,w,g,h,bss, sapp, last); } } else { k = r; l = FP[k]; kt = FT[k]; /* first block */ if (rmid < 0) { bg_align(A,B,r-1,r+rmid,rmid+1,min(up,r+rmid),tb,1,w,g,h,bss,sapp,last); DEL(1) } else if (rmid > 0) { bg_align(A,B,r,r+rmid-1,max(-r,low),rmid-1,tb,2,w,g,h,bss,sapp,last); INS(1) } /* intermediate blocks */ t2 = up-rmid-1; t3 = low-rmid+1; for (; l > -1; k = l, l = FP[k], kt = FT[k]) { if (kt == 0) { REP } else if (kt == 1) { /* right-hand side triangle */ INS(1) t1 = l-k-1; bg_align(A+k,B+k+rmid+1,t1,t1,0,min(t1,t2),2,1,w,g,h,bss,sapp,last); DEL(1) } else { /* kt == 2, left-hand side triangle */ DEL(1) t1 = l-k-1; bg_align(A+k+1,B+k+rmid,t1,t1,max(-t1,t3),0,1,2,w,g,h,bss,sapp,last); INS(1) } } /* last block */ if (N-M > rmid) { INS(1) t1 = k+rmid+1; bg_align(A+k,B+t1,M-k,N-t1,0,min(N-t1,t2),2,te,w,g,h,bss,sapp,last); } else if (N-M < rmid) { DEL(1) t1 = M-(k+1); bg_align(A+k+1,B+k+rmid,t1,N-(k+rmid),max(-t1,t3),0,1,te,w,g,h, bss,sapp,last); } } return(v); } int B_ALIGN(const unsigned char *A, const unsigned char *B, int M, int N, int low, int up, int **W, int G, int H, int *S, int *nS, int MW, int MX, struct bdstr *bss) { int c, i, j; int g, h; size_t mj; int check_score; int **sapp, *sapp_v, *last, last_v; g = G; h = H; sapp_v = S; sapp = &sapp_v; last_v = 0; last = &last_v; low = min(max(-M, low),min(N-M,0)); up = max(min(N, up),max(N-M,0)); if (N <= 0) { if (M > 0) { DEL(M); } return -gap(M); } if (M <= 0) { INS(N); return -gap(N); } if (up-low+1 <= 1) { c = 0; for (i = 1; i <= M; i++) { REP; c += W[A[i]][B[i]]; } return c; } if (MT[0]==NULL) { mj = MX+1; MT[0] = (char *) ckalloc(mj); MT[1] = (char *) ckalloc(mj); MT[2] = (char *) ckalloc(mj); FT = (char *) ckalloc(mj); mj *= sizeof(int); MP[0] = (int *) ckalloc(mj); MP[1] = (int *) ckalloc(mj); MP[2] = (int *) ckalloc(mj); FP = (int *) ckalloc(mj); } c = bg_align(A,B,M,N,low,up,0,0,W,G,H,bss, sapp, last); check_score = BCHECK_SCORE(A,B,M,N,S,W,G,H,nS); free(FP); free(MP[2]); free(MP[1]); free(MP[0]); free(FT); free(MT[2]); free(MT[1]); free(MT[0]); MT[0]=NULL; if (check_score != c) printf("\nBCheck_score=%d != %d\n", check_score,c); return c; } int do_walign (const unsigned char *aa0, int n0, const unsigned char *aa1, int n1, int frame, struct pstruct *ppst, struct f_struct *f_str, struct a_res_str *a_res, int *have_ares) { int hoff, optflag_s, optcut_s, optwid_s, n10, score; const unsigned char *aa1p; struct rstruct rst; #ifdef TFASTA f_str->n10 = n10=aatran(aa1,f_str->aa1x,n1,frame); do_fasta (aa0, n0, f_str->aa1x, n10, ppst, f_str, &rst, &hoff); aa1p = f_str->aa1x; #else n10 = n1; aa1p = aa1; #endif a_res->res = f_str->res; *have_ares = 1; if (ppst->sw_flag) return sw_walign(aa0, n0, aa1p, n10, ppst, f_str, a_res); else { optflag_s = ppst->param_u.fa.optflag; optcut_s = ppst->param_u.fa.optcut; optwid_s = ppst->param_u.fa.optwid; ppst->param_u.fa.optflag = 1; ppst->param_u.fa.optcut = 0; ppst->param_u.fa.optwid *= 2; do_fasta(aa0, n0, aa1p, n10, ppst, f_str, &rst, &hoff); if (rst.score[0]>0) { score=bd_walign(aa0, n0, aa1p, n10, ppst, f_str, hoff, a_res); } else { a_res->nres = 0; score=0; } ppst->param_u.fa.optflag = optflag_s; ppst->param_u.fa.optcut = optcut_s; ppst->param_u.fa.optwid = optwid_s; return score; } } void pre_cons(const unsigned char *aa1, int n1, int frame, struct f_struct *f_str) { #ifdef TFASTA f_str->n10 = aatran(aa1,f_str->aa1x,n1,frame); #endif } /* aln_func_vals - set up aln.qlfact, qlrev, llfact, llmult, frame, llrev */ /* call from calcons, calc_id, calc_code */ void aln_func_vals(int frame, struct a_struct *aln) { #ifdef TFASTA aln->qlfact = 1; aln->llfact = 3; aln->llmult = 3; aln->qlrev = 0; aln->frame = frame; if (frame > 2) { aln->llrev = 1; aln->frame = 3 - frame; } else aln->llrev = 0; #else /* FASTA */ aln->llfact = aln->qlfact = aln->llmult = 1; aln->llrev = 0; if (frame > 0) aln->qlrev = 1; else aln->qlrev = 0; aln->frame = 0; #endif } #include "a_mark.h" int calcons(const unsigned char *aa0, int n0, const unsigned char *aa1, int n1, int *nc, struct a_struct *aln, struct a_res_str a_res, struct pstruct pst, char *seqc0, char *seqc1, char *seqca, struct f_struct *f_str) { int i0, i1, nn1; int op, lenc, nd, ns, itmp; const unsigned char *aa1p; char *sp0, *sp1, *spa, *sq; int *rp; int smins, mins; if (pst.ext_sq_set) { sq = pst.sqx; } else { sq = pst.sq; } #ifndef TFASTA aa1p = aa1; nn1 = n1; #else aa1p = f_str->aa1x; nn1 = f_str->n10; #endif aln->amin0 = a_res.min0; aln->amax0 = a_res.max0; aln->amin1 = a_res.min1; aln->amax1 = a_res.max1; /* will we show all the start ?*/ if (min(a_res.min0,a_res.min1) < aln->llen || aln->showall==1) if (a_res.min0 >= a_res.min1) { /* aa0 extends more to left */ smins=0; if (aln->showall==1) mins = a_res.min0; else mins = min(a_res.min0,aln->llcntx); aancpy(seqc0,(char *)aa0+a_res.min0-mins,mins,pst); aln->smin0 = a_res.min0-mins; if ((mins-a_res.min1)>0) { memset(seqc1,' ',mins-a_res.min1); aancpy(seqc1+mins-a_res.min1,(char *)aa1p,a_res.min1,pst); aln->smin1 = 0; } else { aancpy(seqc1,(char *)aa1p+a_res.min1-mins,mins,pst); aln->smin1 = a_res.min1-mins; } } else { smins=0; if (aln->showall == 1) mins=a_res.min1; else mins = min(a_res.min1,aln->llcntx); aancpy(seqc1,(char *)(aa1p+a_res.min1-mins),mins,pst); aln->smin1 = a_res.min1-mins; if ((mins-a_res.min0)>0) { memset(seqc0,' ',mins-a_res.min0); aancpy(seqc0+mins-a_res.min0,(char *)aa0,a_res.min0,pst); aln->smin0 = 0; } else { aancpy(seqc0,(char *)aa0+a_res.min0-mins,mins,pst); aln->smin0 = a_res.min0-mins; } } else { mins= min(aln->llcntx,min(a_res.min0,a_res.min1)); smins=mins; aln->smin0=a_res.min0 - mins; aln->smin1=a_res.min1 - mins; aancpy(seqc0,(char *)aa0+a_res.min0-mins,mins,pst); aancpy(seqc1,(char *)aa1p+a_res.min1-mins,mins,pst); } /* set the alignment code to zero for context */ memset(seqca,0,mins); /* TFASTA smins = mins = 0; aln->smin0=a_res.min0; aln->smin1=a_res.min1; */ /* now get the middle */ spa = seqca+mins; sp0 = seqc0+mins; sp1 = seqc1+mins; rp = a_res.res; lenc = aln->nident = aln->nsim = aln->ngap_q = aln->ngap_l = aln->nfs = op = 0; i0 = a_res.min0; i1 = a_res.min1; while (i0 < a_res.max0 || i1 < a_res.max1) { if (op == 0 && *rp == 0) { op = *rp++; lenc++; if ((itmp=pst.pam2[0][aa0[i0]][aa1p[i1]])<0) { *spa = M_NEG; } else if (itmp == 0) { *spa = M_ZERO;} else {*spa = M_POS;} if (*spa == M_POS || *spa==M_ZERO) aln->nsim++; *sp0 = sq[aa0[i0++]]; *sp1 = sq[aa1p[i1++]]; if (toupper(*sp0) == toupper(*sp1)) { aln->nident++; *spa = M_IDENT; } else if (pst.nt_align) { if ((toupper(*sp0) == 'T' && toupper(*sp1) == 'U') || (toupper(*sp0)=='U' && toupper(*sp1)=='T')) { aln->nident++; *spa = M_IDENT; } else if (toupper(*sp0) == 'N') aln->ngap_q++; else if (toupper(*sp1) == 'N') aln->ngap_l++; } sp0++; sp1++; spa++; } else { if (op==0) op = *rp++; if (op > 0) { *sp0++ = '-'; *sp1++ = sq[aa1p[i1++]]; *spa++ = M_DEL; op--; lenc++; aln->ngap_q++; } else { *sp0++ = sq[aa0[i0++]]; *sp1++ = '-'; *spa++ = M_DEL; op++; lenc++; aln->ngap_l++; } } } *nc = lenc; *spa = '\0'; /* now we have the middle, get the right end */ if (!aln->llcntx_flg) { ns = mins + lenc + aln->llen; /* show an extra line? */ ns -= (itmp = ns %aln->llen); /* itmp = left over on last line */ if (itmp>aln->llen/2) ns += aln->llen; /* more than 1/2 , use another*/ nd = ns - (mins+lenc); /* this much extra */ } else nd = aln->llcntx; if (nd > max(n0-a_res.max0,nn1-a_res.max1)) nd = max(n0-a_res.max0,nn1-a_res.max1); if (aln->showall==1) { nd = max(n0-a_res.max0,nn1-a_res.max1); /* reset for showall=1 */ /* get right end */ aancpy(seqc0+mins+lenc,(char *)aa0+a_res.max0,n0-a_res.max0,pst); aancpy(seqc1+mins+lenc,(char *)aa1p+a_res.max1,nn1-a_res.max1,pst); /* fill with blanks - this is required to use one 'nc' */ memset(seqc0+mins+lenc+n0-a_res.max0,' ',nd-(n0-a_res.max0)); memset(seqc1+mins+lenc+nn1-a_res.max1,' ',nd-(nn1-a_res.max1)); } else { if ((nd-(n0-a_res.max0))>0) { aancpy(seqc0+mins+lenc,(char *)aa0+a_res.max0,(n0-a_res.max0),pst); memset(seqc0+mins+lenc+n0-a_res.max0,' ',nd-(n0-a_res.max0)); } else aancpy(seqc0+mins+lenc,(char *)aa0+a_res.max0,nd,pst); if ((nd-(nn1-a_res.max1))>0) { aancpy(seqc1+mins+lenc,(char *)aa1p+a_res.max1,nn1-a_res.max1,pst); memset(seqc1+mins+lenc+nn1-a_res.max1,' ',nd-(nn1-a_res.max1)); } else aancpy(seqc1+mins+lenc,(char *)aa1p+a_res.max1,nd,pst); } /* fprintf(stderr,"%d\n",mins+lenc+nd); */ return mins+lenc+nd; } int calcons_a(const unsigned char *aa0, unsigned char *aa0a, int n0, const unsigned char *aa1, int n1, int *nc, struct a_struct *aln, struct a_res_str a_res, struct pstruct pst, char *seqc0, char *seqc0a, char *seqc1, char *seqca, char *ann_arr, struct f_struct *f_str) { int i0, i1, nn1; int op, lenc, nd, ns, itmp; const unsigned char *aa1p; char *sp0, *sp0a, *sp1, *spa, *sq; int *rp; int smins, mins; if (pst.ext_sq_set) { sq = pst.sqx; } else { sq = pst.sq; } #ifndef TFASTA aa1p = aa1; nn1 = n1; #else aa1p = f_str->aa1x; nn1 = f_str->n10; #endif aln->amin0 = a_res.min0; aln->amax0 = a_res.max0; aln->amin1 = a_res.min1; aln->amax1 = a_res.max1; /* will we show all the start ?*/ /* will we show all the start ?*/ if (min(a_res.min0,a_res.min1)llen || aln->showall==1) if (a_res.min0>=a_res.min1) { /* aa0 extends more to left */ smins=0; if (aln->showall==1) mins = a_res.min0; else mins = min(a_res.min0,aln->llcntx); aancpy(seqc0,(char *)aa0+a_res.min0-mins,mins,pst); aln->smin0 = a_res.min0-mins; if ((mins-a_res.min1)>0) { memset(seqc1,' ',mins-a_res.min1); aancpy(seqc1+mins-a_res.min1,(char *)aa1p,a_res.min1,pst); aln->smin1 = 0; } else { aancpy(seqc1,(char *)aa1p+a_res.min1-mins,mins,pst); aln->smin1 = a_res.min1-mins; } } else { smins=0; if (aln->showall == 1) mins=a_res.min1; else mins = min(a_res.min1,aln->llcntx); aancpy(seqc1,(char *)(aa1p+a_res.min1-mins),mins,pst); aln->smin1 = a_res.min1-mins; if ((mins-a_res.min0)>0) { memset(seqc0,' ',mins-a_res.min0); aancpy(seqc0+mins-a_res.min0,(char *)aa0,a_res.min0,pst); aln->smin0 = 0; } else { aancpy(seqc0,(char *)aa0+a_res.min0-mins,mins,pst); aln->smin0 = a_res.min0-mins; } } else { mins= min(aln->llcntx,min(a_res.min0,a_res.min1)); smins=mins; aln->smin0=a_res.min0 - smins; aln->smin1=a_res.min1 - smins; aancpy(seqc0,(char *)aa0+a_res.min0-mins,mins,pst); aancpy(seqc1,(char *)aa1p+a_res.min1-mins,mins,pst); } /* set the alignment code to zero for context */ memset(seqca,0,mins); memset(seqc0a,' ',mins); /* TFASTA smins = mins = 0; aln->smin0=a_res.min0; aln->smin1=a_res.min1; */ /* now get the middle */ spa = seqca+mins; sp0 = seqc0+mins; sp0a = seqc0a+mins; sp1 = seqc1+mins; rp = a_res.res; lenc = aln->nident = aln->nsim = aln->ngap_q = aln->ngap_l = aln->nfs = op = 0; i0 = a_res.min0; i1 = a_res.min1; while (i0 < a_res.max0 || i1 < a_res.max1) { if (op == 0 && *rp == 0) { op = *rp++; lenc++; if ((itmp=pst.pam2[0][aa0[i0]][aa1p[i1]])<0) { *spa = M_NEG; } else if (itmp == 0) { *spa = M_ZERO;} else {*spa = M_POS;} if (*spa == M_POS || *spa==M_ZERO) aln->nsim++; *sp0a++ = ann_arr[aa0a[i0]]; *sp0 = sq[aa0[i0++]]; *sp1 = sq[aa1p[i1++]]; if (toupper(*sp0) == toupper(*sp1)) { aln->nident++; *spa = M_IDENT; } else if (pst.nt_align) { if ((toupper(*sp0) == 'T' && toupper(*sp1) == 'U') || (toupper(*sp0)=='U' && toupper(*sp1)=='T')) { aln->nident++; *spa = M_IDENT; } else if (toupper(*sp0) == 'N') aln->ngap_q++; else if (toupper(*sp1) == 'N') aln->ngap_l++; } sp0++; sp1++; spa++; } else { if (op==0) op = *rp++; if (op>0) { *sp0++ = '-'; *sp1++ = sq[aa1p[i1++]]; *spa++ = M_DEL; *sp0a++ = ' '; op--; lenc++; aln->ngap_q++; } else { *sp0a++ = ann_arr[aa0a[i0]]; *sp0++ = sq[aa0[i0++]]; *sp1++ = '-'; *spa++ = M_DEL; op++; lenc++; aln->ngap_l++; } } } *nc = lenc; *sp0a = *spa = '\0'; /* now we have the middle, get the right end */ if (!aln->llcntx_flg) { ns = mins + lenc + aln->llen; /* show an extra line? */ ns -= (itmp = ns %aln->llen); /* itmp = left over on last line */ if (itmp>aln->llen/2) ns += aln->llen; /* more than 1/2 , use another*/ nd = ns - (mins+lenc); /* this much extra */ } else nd = aln->llcntx; if (nd > max(n0-a_res.max0,nn1-a_res.max1)) nd = max(n0-a_res.max0,nn1-a_res.max1); if (aln->showall==1) { nd = max(n0-a_res.max0,nn1-a_res.max1); /* reset for showall=1 */ /* get right end */ aancpy(seqc0+mins+lenc,(char *)aa0+a_res.max0,n0-a_res.max0,pst); aancpy(seqc1+mins+lenc,(char *)aa1p+a_res.max1,nn1-a_res.max1,pst); /* fill with blanks - this is required to use one 'nc' */ memset(seqc0+mins+lenc+n0-a_res.max0,' ',nd-(n0-a_res.max0)); memset(seqc1+mins+lenc+nn1-a_res.max1,' ',nd-(nn1-a_res.max1)); } else { if ((nd-(n0-a_res.max0))>0) { aancpy(seqc0+mins+lenc,(char *)aa0+a_res.max0,(n0-a_res.max0),pst); memset(seqc0+mins+lenc+n0-a_res.max0,' ',nd-(n0-a_res.max0)); } else aancpy(seqc0+mins+lenc,(char *)aa0+a_res.max0,nd,pst); if ((nd-(nn1-a_res.max1))>0) { aancpy(seqc1+mins+lenc,(char *)aa1p+a_res.max1,nn1-a_res.max1,pst); memset(seqc1+mins+lenc+nn1-a_res.max1,' ',nd-(nn1-a_res.max1)); } else aancpy(seqc1+mins+lenc,(char *)aa1p+a_res.max1,nd,pst); } /* fprintf(stderr,"%d\n",mins+lenc+nd); */ return mins+lenc+nd; } static void update_code(char *al_str, int al_str_max, int op, int op_cnt) { char op_char[5]={"=-+*"}; char tmp_cnt[20]; sprintf(tmp_cnt,"%c%d",op_char[op],op_cnt); strncat(al_str,tmp_cnt,al_str_max); } /* build an array of match/ins/del - length strings */ int calc_code(const unsigned char *aa0, int n0, const unsigned char *aa1, int n1, struct a_struct *aln, struct a_res_str a_res, struct pstruct pst, char *al_str, int al_str_n, struct f_struct *f_str) { int i0, i1, nn1; int op, lenc; int p_op, op_cnt; const unsigned char *aa1p; char sp0, sp1, *sq; int *rp; if (pst.ext_sq_set) { sq = pst.sqx; } else { sq = pst.sq; } #ifndef TFASTA aa1p = aa1; nn1 = n1; #else aa1p = f_str->aa1x; nn1 = f_str->n10; #endif aln->amin0 = a_res.min0; aln->amax0 = a_res.max0; aln->amin1 = a_res.min1; aln->amax1 = a_res.max1; rp = a_res.res; lenc = aln->nident = aln->nsim = aln->ngap_q = aln->ngap_l = aln->nfs = op = p_op = 0; op_cnt = 0; i0 = a_res.min0; i1 = a_res.min1; while (i0 < a_res.max0 || i1 < a_res.max1) { if (op == 0 && *rp == 0) { if (pst.pam2[0][aa0[i0]][aa1p[i1]]>=0) { aln->nsim++;} sp0 = sq[aa0[i0++]]; sp1 = sq[aa1p[i1++]]; if (p_op == 0 || p_op==3) { if (sp0 != '*' && sp1 != '*') { if (p_op == 3) { update_code(al_str,al_str_n-strlen(al_str),p_op,op_cnt); op_cnt = 1; p_op = 0; } else {op_cnt++;} } else { update_code(al_str,al_str_n-strlen(al_str),p_op,op_cnt); op_cnt = 1; p_op = 3; } } else { update_code(al_str,al_str_n-strlen(al_str),p_op,op_cnt); op_cnt = 1; p_op = 0; } op = *rp++; lenc++; if (toupper(sp0) == toupper(sp1)) aln->nident++; else if (pst.nt_align) { if ((toupper(sp0) == 'T' && toupper(sp1) == 'U') || (toupper(sp0)=='U' && toupper(sp1)=='T')) aln->nident++; else if (toupper(sp0) == 'N') aln->ngap_q++; else if (toupper(sp1) == 'N') aln->ngap_l++; } } else { if (op==0) op = *rp++; if (op>0) { if (p_op == 1) { op_cnt++;} else { update_code(al_str,al_str_n - strlen(al_str),p_op,op_cnt); op_cnt = 1; p_op = 1; } op--; lenc++; i1++; aln->ngap_q++; } else { if (p_op == 2) { op_cnt++;} else { update_code(al_str,al_str_n - strlen(al_str),p_op,op_cnt); op_cnt = 1; p_op = 2; } op++; lenc++; i0++; aln->ngap_l++; } } } update_code(al_str,al_str_n - strlen(al_str),p_op,op_cnt); return lenc; } int calc_id(const unsigned char *aa0, int n0, const unsigned char *aa1, int n1, struct a_struct *aln, struct a_res_str a_res, struct pstruct pst, struct f_struct *f_str) { int i0, i1, nn1; int op, lenc; int sp0, sp1; const unsigned char *aa1p; int *rp; char *sq; if (pst.ext_sq_set) { sq = pst.sqx; } else { sq = pst.sq; } #ifndef TFASTA aa1p = aa1; nn1 = n1; #else aa1p = f_str->aa1x; nn1 = f_str->n10; #endif aln->amin0 = a_res.min0; aln->amax0 = a_res.max0; aln->amin1 = a_res.min1; aln->amax1 = a_res.max1; rp = a_res.res; lenc = aln->nident = aln->nsim = aln->ngap_q = aln->ngap_l = aln->nfs = op = 0; i0 = a_res.min0; i1 = a_res.min1; while (i0 < a_res.max0 || i1 < a_res.max1) { if (op == 0 && *rp == 0) { op = *rp++; lenc++; if (pst.pam2[0][aa0[i0]][aa1p[i1]]>=0) { aln->nsim++;} sp0 = sq[aa0[i0++]]; sp1 = sq[aa1p[i1++]]; if (toupper(sp0) == toupper(sp1)) {aln->nident++;} else if (pst.nt_align) { if ((toupper(sp0)=='T' && toupper(sp1)== 'U')|| (toupper(sp0)=='U' && toupper(sp1)=='T')) {aln->nident++;} else if (toupper(sp0) == 'N') aln->ngap_q++; else if (toupper(sp1) == 'N') aln->ngap_l++; } } else { if (op==0) op = *rp++; if (op>0) {op--; lenc++; i1++; aln->ngap_q++;} else {op++; lenc++; i0++; aln->ngap_l++; } } } return lenc; } #ifdef PCOMPLIB #include "w_mw.h" void update_params(struct qmng_str *qm_msg, struct pstruct *ppst) { ppst->n0 = qm_msg->n0; } #endif