/* copyright (c) 1994, 1995, 1996 William R. Pearson */ /* $Name: fa_34_26_5 $ - $Id: dropnsw.c,v 1.35 2006/10/19 14:49:14 wrp Exp $ */ /* this is a slower version of dropgsw.c that implements the Smith-Waterman algorithm. It lacks the shortcuts in dropgsw.c that prevent scores less than the penalty for the first residue in a gap from being generated. Thus, dropnsw.c should be used for tests with very large gap penalties, and is more appropriate for programs like prss3, which are interested in accurate low scores. */ /* the do_walign() code in this file is not thread_safe */ /* init_work(), do_work(), are thread safe */ #include #include #include #include #include #include "defs.h" #include "param.h" static char *verstr="3.5 Sept 2006"; struct swstr { int H, E;}; struct f_struct { struct swstr *ss; struct swstr *f_ss; struct swstr *r_ss; int *waa_s, *waa_a; int **pam2p[2]; int *res; double aa0_f[MAXSQ]; double *kar_p; }; #define DROP_INTERN #include "drop_func.h" extern int do_karlin(const unsigned char *aa1, int n1, int **pam2, struct pstruct *ppst, double *aa0_f, double *kar_p, double *lambda, double *H); extern void aancpy(char *to, char *from, int count, struct pstruct pst); int ALIGN(const unsigned char *A, const unsigned char *B, int M, int N, int **W, int IW, int G, int H, int *S, int *NC, struct f_struct *f_str); /* initialize for Smith-Waterman optimal score */ void init_work (unsigned char *aa0, int n0, struct pstruct *ppst, struct f_struct **f_arg) { int maxn0; int *pwaa_s, *pwaa_a; int e, f, i, j, q; int *res; struct f_struct *f_str; int **pam2p; struct swstr *ss, *f_ss, *r_ss; int nsq, ip; if (ppst->ext_sq_set) { nsq = ppst->nsqx; ip = 1; } else { nsq = ppst->nsq; ip = 0; } f_str = (struct f_struct *)calloc(1,sizeof(struct f_struct)); /* allocate space for the scoring arrays */ maxn0 = n0 + 2; if ((ss = (struct swstr *) calloc (maxn0, sizeof (struct swstr))) == NULL) { fprintf (stderr, "cannot allocate ss array %3d\n", n0); exit (1); } ss++; f_str->ss = ss; if ((f_ss = (struct swstr *) calloc (maxn0, sizeof (struct swstr))) == NULL) { fprintf (stderr, "cannot allocate f_ss array %3d\n", n0); exit (1); } f_ss++; f_str->f_ss = f_ss; if ((r_ss = (struct swstr *) calloc (n0+2, sizeof (struct swstr))) == NULL) { fprintf (stderr, "cannot allocate r_ss array %3d\n", n0); exit (1); } r_ss++; f_str->r_ss = r_ss; /* initialize variable (-S) pam matrix */ if ((f_str->waa_s= (int *)calloc((nsq+1)*(n0+1),sizeof(int))) == NULL) { fprintf(stderr,"cannot allocate waa_s array %3d\n",nsq*n0); exit(1); } if ((f_str->pam2p[1]= (int **)calloc((n0+1),sizeof(int *))) == NULL) { fprintf(stderr,"cannot allocate pam2p[1] array %3d\n",n0); exit(1); } pam2p = f_str->pam2p[1]; if ((pam2p[0]=(int *)calloc((nsq+1)*(n0+1),sizeof(int))) == NULL) { fprintf(stderr,"cannot allocate pam2p[1][] array %3d\n",nsq*n0); exit(1); } for (i=1; iwaa_a= (int *)calloc((nsq+1)*(n0+1),sizeof(int))) == NULL) { fprintf(stderr,"cannot allocate waa_a struct %3d\n",nsq*n0); exit(1); } if ((f_str->pam2p[0]= (int **)calloc((n0+1),sizeof(int *))) == NULL) { fprintf(stderr,"cannot allocate pam2p[1] array %3d\n",n0); exit(1); } pam2p = f_str->pam2p[0]; if ((pam2p[0]=(int *)calloc((nsq+1)*(n0+1),sizeof(int))) == NULL) { fprintf(stderr,"cannot allocate pam2p[1][] array %3d\n",nsq*n0); exit(1); } for (i=1; iwaa_s + (*aa1p++)*n0; sets up pwaa so that *pwaa++ rapidly moves though the scores of the aa1p[] position without further indexing For a real sequence profile, pwaa[0..n0-1] vs ['A'] could have a different score in each position. */ if (ppst->pam_pssm) { pwaa_s = f_str->waa_s; pwaa_a = f_str->waa_a; for (e = 0; e <=nsq; e++) { /* for each residue in the alphabet */ for (f = 0; f < n0; f++) { /* for each position in aa0 */ *pwaa_s++ = f_str->pam2p[ip][f][e] = ppst->pam2p[ip][f][e]; *pwaa_a++ = f_str->pam2p[0][f][e] = ppst->pam2p[0][f][e]; } } } else { /* initialize scanning matrix */ pwaa_s = f_str->waa_s; pwaa_a = f_str->waa_a; for (e = 0; e <=nsq; e++) /* for each residue in the alphabet */ for (f = 0; f < n0; f++) { /* for each position in aa0 */ *pwaa_s++ = f_str->pam2p[ip][f][e]= ppst->pam2[ip][e][aa0[f]]; *pwaa_a++ = f_str->pam2p[0][f][e] = ppst->pam2[0][e][aa0[f]]; } } maxn0 = max(3*n0/2,MIN_RES); if ((res = (int *)calloc((size_t)maxn0,sizeof(int)))==NULL) { fprintf(stderr,"cannot allocate alignment results array %d\n",maxn0); exit(1); } f_str->res = res; *f_arg = f_str; } void close_work (const unsigned char *aa0, int n0, struct pstruct *ppst, struct f_struct **f_arg) { struct f_struct *f_str; f_str = *f_arg; if (f_str != NULL) { if (f_str->kar_p !=NULL) free(f_str->kar_p); f_str->ss--; free(f_str->ss); free(f_str->res); free(f_str->waa_a); free(f_str->pam2p[0][0]); free(f_str->pam2p[0]); free(f_str->waa_s); free(f_str->pam2p[1][0]); free(f_str->pam2p[1]); free(f_str); *f_arg = NULL; } } /* pstring1 is a message to the manager, currently 512 */ /*void get_param(struct pstruct *pstr,char *pstring1)*/ void get_param (struct pstruct *pstr, char *pstring1, char *pstring2) { char psi_str[120]; char *pg_str="Smith-Waterman"; if (pstr->pam_pssm) { strncpy(psi_str,"-PSI",sizeof(psi_str));} else { psi_str[0]='\0';} #ifdef OLD_FASTA_GAP sprintf (pstring1, " %s (%s) function [%s matrix%s (%d:%d)%s], gap-penalty: %d/%d", #else sprintf (pstring1, " %s (%s) function [%s matrix%s (%d:%d)%s], open/ext: %d/%d", #endif pg_str, verstr, pstr->pamfile, psi_str, pstr->pam_h,pstr->pam_l, (pstr->ext_sq_set)?"xS":"\0", pstr->gdelval, pstr->ggapval); if (pstring2 != NULL) { #ifdef OLD_FASTA_GAP sprintf(pstring2,"; pg_name: %s\n; pg_ver: %s\n; pg_matrix: %s (%d:%d)%s\n; pg_gap-pen: %d %d\n", #else sprintf(pstring2,"; pg_name: %s\n; pg_ver: %s\n; pg_matrix: %s (%d:%d)%s\n; pg_open-ext: %d %d\n", #endif pg_str,verstr,psi_str,pstr->pam_h,pstr->pam_l, (pstr->ext_sq_set)?"xS":"\0",pstr->gdelval,pstr->ggapval); } } void do_work (const unsigned char *aa0, int n0, const unsigned char *aa1, int n1, int frame, struct pstruct *ppst, struct f_struct *f_str, int qr_flg, struct rstruct *rst) { const unsigned char *aa0p, *aa1p; register struct swstr *ssj; struct swstr *ss, *f_ss, *r_ss; register int *pwaa; int *waa; register int i, j; int e, f, h, p; int q, r, m; int score; double lambda, H, K; rst->escore = 1.0; rst->segnum = rst->seglen = 1; waa = f_str->waa_s; ss = f_str->ss; f_ss = f_str->f_ss; r_ss = f_str->r_ss; #ifdef OLD_FASTA_GAP q = -(ppst->gdelval - ppst->ggapval); #else q = -ppst->gdelval; #endif r = -ppst->ggapval; m = q + r; /* initialize 0th row */ for (ssj=ss; ssj<&ss[n0]; ssj++) { ssj->H = 0; ssj->E = -q; } score = 0; aa1p = aa1; while (*aa1p) { h = p = 0; f = -q; pwaa = waa + (*aa1p++ * n0); for (ssj = ss, aa0p = aa0; ssj < ss+n0; ssj++) { if ((h = h - m) > (f = f - r)) f = h; if ((h = ssj->H - m) > (e = ssj->E - r)) e = h; h = p + *pwaa++; if (h < 0 ) h = 0; if (h < f ) h = f; if (h < e ) h = e; p = ssj->H; ssj->H = h; ssj->E = e; if (h > score) score = h; } } /* done with forward pass */ rst->score[0] = score; if(ppst->zsflag == 6 || ppst->zsflag == 16 && (do_karlin(aa1, n1, ppst->pam2[0], ppst,f_str->aa0_f, f_str->kar_p, &lambda, &H)>0)) { rst->comp = 1.0/lambda; rst->H = H; } else {rst->comp = rst->H = -1.0;} } /* here we should be all done */ void do_opt (const unsigned char *aa0, int n0, const unsigned char *aa1, int n1, int frame, struct pstruct *pst, struct f_struct *f_str, struct rstruct *rstr) { } int do_walign (const unsigned char *aa0, const int n0, const unsigned char *aa1, const int n1, int frame, struct pstruct *ppst, struct f_struct *f_str, struct a_res_str *a_res, int *have_ares ) { const unsigned char *aa0p, *aa1p; register int *pwaa; register int i, j; register struct swstr *ssj; struct swstr *f_ss, *r_ss, *ss; int *res, *waa; int e, f, h, p; int q, r, m; int score; int cost, I, J, K, L; ss = f_str->ss; res = f_str->res; waa = f_str->waa_a; /* this time use universal pam2[0] */ #ifdef OLD_FASTA_GAP q = -(ppst->gdelval - ppst->ggapval); #else q = -ppst->gdelval; #endif r = -ppst->ggapval; m = q + r; /* initialize 0th row */ for (ssj=ss; ssjH = 0; ssj->E = -q; } score = 0; aa1p = aa1; i = 0; while (*aa1p) { h = p = 0; f = -q; pwaa = waa + (*aa1p++ * n0); for (ssj = ss, aa0p = aa0; ssj < ss+n0; ssj++) { if ((h = h - m) > /* gap open from left best */ /* gap extend from left gapped */ (f = f - r)) f = h; /* if better, use new gap opened */ if ((h = ssj->H - m) > /* gap open from up best */ /* gap extend from up gap */ (e = ssj->E - r)) e = h; /* if better, use new gap opened */ h = p + *pwaa++; /* diagonal match */ if (h < 0 ) h = 0; /* ? < 0, reset to 0 */ if (h < f ) h = f; /* left gap better, reset */ if (h < e ) h = e; /* up gap better, reset */ p = ssj->H; /* save previous best score */ ssj->H = h; /* save (new) up diag-matched */ ssj->E = e; /* save upper gap opened */ if (h > score) { /* ? new best score */ score = h; /* save best */ I = i; /* row */ J = (int)(ssj-ss); /* column */ } } i++; } /* done with forward pass */ if (score <= 0) return 0; /* to get the start point, go backwards */ /* 18-June-2003 fix bug in backtracking code to identify start of alignment. Code used pam2[0][aa0[j]][aa1[i]] instead of pam2p[0][j][aa1[i]]. Ideally, it would use waa_a. */ cost = K = L = 0; for (ssj=ss+J; ssj>=ss; ssj--) ssj->H= ssj->E= -1; for (i=I; i>=0; i--) { h = f = -1; p = (i == I) ? 0 : -1; for (ssj=ss+J, j= J; ssj>=ss; ssj--,j--) { f = max (f,h-q)-r; ssj->E=max(ssj->E,ssj->H-q)-r; h = max(max(ssj->E,f),p+f_str->pam2p[0][j][aa1[i]]); p = ssj->H; ssj->H=h; if (h > cost) { cost = h; K = i; L = (int)(ssj-ss); if (cost >= score) goto found; } } } found: /* printf(" %d: L: %3d-%3d/%3d; K: %3d-%3d/%3d\n",score,L,J,n0,K,I,n1); */ /* in the f_str version, the *res array is already allocated at 4*n0/3 */ a_res->res = f_str->res; *have_ares = 1; a_res->max0 = J+1; a_res->min0 = L; a_res->max1 = I+1; a_res->min1 = K; /* ALIGN(&aa1[K-1],&aa0[L-1],I-K+1,J-L+1,ppst->pam2[0],q,r,res,nres,f_str); */ /* this code no longer refers to aa0[], it used pam2p[0][L] instead */ ALIGN(&aa0[L-1],&aa1[K-1],J-L+1,I-K+1,f_str->pam2p[0],L,q,r, a_res->res,&a_res->nres,f_str); /* DISPLAY(&aa0[L-1],&aa1[K-1],J-L+1,I-K+1,res,L,K,ppst->sq); */ return score; } static int CHECK_SCORE(const unsigned char *A, const unsigned char *B, int M, int N, int *S, int **W, int IW, int G, int H, int *nres); #define gap(k) ((k) <= 0 ? 0 : g+h*(k)) /* k-symbol indel cost */ /* Append "Delete k" op */ #define DEL(k) \ { if (*last < 0) \ *last = (*sapp)[-1] -= (k); \ else { \ *last = (*sapp)[0] = -(k); \ (*sapp)++; \ } \ } /* Append "Insert k" op */ #define INS(k) \ { if (*last > 0) \ *last = (*sapp)[-1] += (k); \ else { \ *last = (*sapp)[0] = (k); \ (*sapp)++; \ } \ } #define REP { *last = (*sapp)[0] = 0; (*sapp)++; } /* Append "Replace" op */ /* #define XTERNAL #include "upam.h" void print_seq_prof(unsigned char *A, int M, unsigned char *B, int N, int **w, int iw, int dir) { char c_max; int i_max, j_max, i,j; char *c_dir="LRlr"; for (i=1; i<=min(60,M); i++) { fprintf(stderr,"%c",aa[A[i]]); } fprintf(stderr, - %d\n,M); for (i=0; i i_max) { i_max = w[iw+i][j]; j_max = j; } } fprintf(stderr,"%c",aa[j_max]); } fputc(':',stderr); for (i=1; i<=min(60,N); i++) { fprintf(stderr,"%c",aa[B[i]]); } fprintf(stderr," -%c: %d,%d\n",c_dir[dir],M,N); } */ /* align(A,B,M,N,tb,te) returns the cost of an optimum conversion between A[1..M] and B[1..N] that begins(ends) with a delete if tb(te) is zero and appends such a conversion to the current script. */ static int align(const unsigned char *A, const unsigned char *B, int M, int N, int tb, int te, int **w, int iw, int g, int h, struct f_struct *f_str, int dir, int **sapp, int *last) { int midi, midj, type; /* Midpoint, type, and cost */ int midc; int c1, c2; { register int i, j; register int c, e, d, s; int m, t, *wa; struct swstr *f_ss, *r_ss; /* print_seq_prof(A,M,B,N,w,iw,dir); */ m = g + h; f_ss = f_str->f_ss; r_ss = f_str->r_ss; /* Boundary cases: M <= 1 or N == 0 */ if (N <= 0) { if (M > 0) { DEL(M) } return -gap(M); } if (M <= 1) { if (M <= 0){ INS(N) return -gap(N); } if (tb < te) tb = te; midc = (tb-h) - gap(N); midj = 0; /* wa = w[A[1]]; */ wa = w[iw]; for (j = 1; j <= N; j++) { c = -gap(j-1) + wa[B[j]] - gap(N-j); if (c > midc) { midc = c; midj = j;} } if (midj == 0) { DEL(1) INS(N) } else { if (midj > 1) { INS(midj-1)} REP if (midj < N) { INS(N-midj)} } return midc; } /* Divide: Find optimum midpoint (midi,midj) of cost midc */ midi = M/2; /* Forward phase: */ f_ss[0].H = 0; /* Compute H(M/2,k) & E(M/2,k) for all k */ t = -g; for (j = 1; j <= N; j++) { f_ss[j].H = t = t-h; f_ss[j].E = t-g; } t = tb; for (i = 1; i <= midi; i++) { s = f_ss[0].H; f_ss[0].H = c = t = t-h; e = t-g; /* wa = w[A[i]]; */ wa = w[iw+i-1]; for (j = 1; j <= N; j++) { if ((c = c - m) > (e = e - h)) e = c; if ((c = f_ss[j].H - m) > (d = f_ss[j].E - h)) d = c; c = s + wa[B[j]]; if (e > c) c = e; if (d > c) c = d; s = f_ss[j].H; f_ss[j].H = c; f_ss[j].E = d; } } f_ss[0].E = f_ss[0].H; r_ss[N].H = 0; /* Reverse phase: */ t = -g; /* Compute R(M/2,k) & S(M/2,k) for all k */ for (j = N-1; j >= 0; j--) { r_ss[j].H = t = t-h; r_ss[j].E = t-g; } t = te; for (i = M-1; i >= midi; i--) { s = r_ss[N].H; r_ss[N].H = c = t = t-h; e = t-g; /* wa = w[A[i+1]]; */ wa = w[iw+i]; for (j = N-1; j >= 0; j--) { if ((c = c - m) > (e = e - h)) e = c; if ((c = r_ss[j].H - m) > (d = r_ss[j].E - h)) d = c; c = s + wa[B[j+1]]; if (e > c) c = e; if (d > c) c = d; s = r_ss[j].H; r_ss[j].H = c; r_ss[j].E = d; } } r_ss[N].E = r_ss[N].H; midc = f_ss[0].H+r_ss[0].H; /* Find optimal midpoint */ midj = 0; type = 1; for (j = 0; j <= N; j++) if ((c = f_ss[j].H + r_ss[j].H) >= midc) if (c > midc || f_ss[j].H != f_ss[j].E && r_ss[j].H == r_ss[j].E) { midc = c; midj = j; } for (j = N; j >= 0; j--) if ((c = f_ss[j].E + r_ss[j].E + g) > midc) { midc = c; midj = j; type = 2; } } /* Conquer: recursively around midpoint */ if (type == 1) { c1 = align(A,B,midi,midj,tb,-g,w,iw,g,h,f_str,0, sapp, last); c2 = align(A+midi,B+midj,M-midi,N-midj,-g,te,w,iw+midi,g,h,f_str,1,sapp,last); } else { align(A,B,midi-1,midj,tb,0,w,iw,g,h,f_str,2,sapp, last); DEL(2); align(A+midi+1,B+midj,M-midi-1,N-midj,0,te,w,iw+midi+1,g,h,f_str,3,sapp,last); } return midc; } /* Interface and top level of comparator */ int ALIGN(const unsigned char *A, const unsigned char *B, int M, int N, int **W, int IW, int G, int H, int *S, int *NC, struct f_struct *f_str) { struct swstr *f_ss, *r_ss; int *sapp, last; int c, ck; sapp = S; last = 0; if ((f_ss = (struct swstr *) calloc (N+2, sizeof (struct swstr))) == NULL) { fprintf (stderr, "cannot allocate f_ss array %3d\n", N+2); exit (1); } f_ss++; f_str->f_ss = f_ss; if ((r_ss = (struct swstr *) calloc (N+2, sizeof (struct swstr))) == NULL) { fprintf (stderr, "cannot allocate r_ss array %3d\n", N+2); exit (1); } r_ss++; f_str->r_ss = r_ss; /* print_seq_prof(A,M,W,IW); */ c = align(A,B,M,N,-G,-G,W,IW,G,H,f_str,0,&sapp, &last); /* OK, do it */ ck = CHECK_SCORE(A,B,M,N,S,W,IW,G,H,NC); if (c != ck) printf("Check_score error. %d != %d\n",c,ck); f_ss--; r_ss--; free(r_ss); free(f_ss); return c; } /* Alignment display routine */ static char ALINE[51], BLINE[51], CLINE[51]; void DISPLAY(unsigned char *A, unsigned char *B, int M, int N, int *S, int AP, int BP, char *sq) { register char *a, *b, *c; register int i, j, op; int lines, ap, bp; i = j = op = lines = 0; ap = AP; bp = BP; a = ALINE; b = BLINE; c = CLINE; while (i < M || j < N) { if (op == 0 && *S == 0) { op = *S++; *a = sq[A[++i]]; *b = sq[B[++j]]; *c++ = (*a++ == *b++) ? '|' : ' '; } else { if (op == 0) op = *S++; if (op > 0) { *a++ = ' '; *b++ = sq[B[++j]]; op--; } else { *a++ = sq[A[++i]]; *b++ = ' '; op++; } *c++ = '-'; } if (a >= ALINE+50 || i >= M && j >= N) { *a = *b = *c = '\0'; printf("\n%5d ",50*lines++); for (b = ALINE+10; b <= a; b += 10) printf(" . :"); if (b <= a+5) printf(" ."); printf("\n%5d %s\n %s\n%5d %s\n",ap,ALINE,CLINE,bp,BLINE); ap = AP + i; bp = BP + j; a = ALINE; b = BLINE; c = CLINE; } } } /* CHECK_SCORE - return the score of the alignment stored in S */ static int CHECK_SCORE(const unsigned char *A, const unsigned char *B, int M, int N, int *S, int **w, int iw, int g, int h, int *NC) { register int i, j, op, nc; int score; /* print_seq_prof(A,M,w,iw); */ score = i = j = op = nc = 0; while (i < M || j < N) { op = *S++; if (op == 0) { score = w[iw+i][B[++j]] + score; i++; nc++; } else if (op > 0) { score = score - (g+op*h); j += op; nc += op; } else { score = score - (g-op*h); i -= op; nc -= op; } } *NC = nc; return score; } void pre_cons(const unsigned char *aa1, int n1, int frame, struct f_struct *f_str) {} /* aln_func_vals - set up aln.qlfact, qlrev, llfact, llmult, frame, llrev */ /* call from calcons, calc_id, calc_code */ void aln_func_vals(int frame, struct a_struct *aln) { aln->llfact = aln->llmult = aln->qlfact = 1; aln->qlrev = aln->llrev = 0; aln->frame = 0; } /* 29-June-2003 this version has been modified to use pst.pam2p instead of pam2 to indicate similarity */ #include "a_mark.h" int calcons(const unsigned char *aa0, int n0, const unsigned char *aa1, int n1, int *nc, struct a_struct *aln, struct a_res_str a_res, struct pstruct pst, char *seqc0, char *seqc1, char *seqca, struct f_struct *f_str) { int i0, i1; int op, lenc, nd, ns, itmp; char *sp0, *sp1, *spa, *sq; int *rp; int mins, smins; if (pst.ext_sq_set) { sq = pst.sqx;} else {sq = pst.sq;} aln->amin0 = a_res.min0; aln->amax0 = a_res.max0; aln->amin1 = a_res.min1; aln->amax1 = a_res.max1; /* #define LFASTA */ #ifndef LFASTA if (min(a_res.min0,a_res.min1)llen || aln->showall==1) /* will we show all the start ?*/ if (a_res.min0>=a_res.min1) { /* aa0 extends more to left */ smins=0; if (aln->showall==1) mins=a_res.min0; else mins = min(a_res.min0,aln->llcntx); aancpy(seqc0,(char *)aa0+a_res.min0-mins,mins,pst); aln->smin0 = a_res.min0-mins; if ((mins-a_res.min1)>0) { memset(seqc1,' ',mins-a_res.min1); aancpy(seqc1+mins-a_res.min1,(char *)aa1,a_res.min1,pst); aln->smin1 = 0; } else { aancpy(seqc1,(char *)aa1+a_res.min1-mins,mins,pst); aln->smin1 = a_res.min1-mins; } } else { smins=0; if (aln->showall == 1) mins=a_res.min1; else mins = min(a_res.min1,aln->llcntx); aancpy(seqc1,(char *)(aa1+a_res.min1-mins),mins,pst); aln->smin1 = a_res.min1-mins; if ((mins-a_res.min0)>0) { memset(seqc0,' ',mins-a_res.min0); aancpy(seqc0+mins-a_res.min0,(char *)aa0,a_res.min0,pst); aln->smin0 = 0; } else { aancpy(seqc0,(char *)aa0+a_res.min0-mins,mins,pst); aln->smin0 = a_res.min0-mins; } } else { mins= min(aln->llcntx,min(a_res.min0,a_res.min1)); smins=mins; aln->smin0=a_res.min0; aln->smin1=a_res.min1; aancpy(seqc0,(char *)aa0+a_res.min0-mins,mins,pst); aancpy(seqc1,(char *)aa1+a_res.min1-mins,mins,pst); } #else aln->smin0 = a_res.min0; aln->smin1 = a_res.min1; smins = mins = 0; #endif /* now get the middle */ memset(seqca,M_BLANK,mins); spa = seqca+mins; sp0 = seqc0+mins; sp1 = seqc1+mins; rp = a_res.res; lenc = aln->nident = aln->nsim = aln->ngap_q = aln->ngap_l = aln->nfs =op = 0; i0 = a_res.min0; i1 = a_res.min1; while (i0 < a_res.max0 || i1 < a_res.max1) { if (op == 0 && *rp == 0) { op = *rp++; lenc++; if ((itmp=f_str->pam2p[0][i0][aa1[i1]])<0) { *spa = M_NEG; } else if (itmp == 0) { *spa = M_ZERO;} else {*spa = M_POS;} if (*spa == M_POS || *spa==M_ZERO) aln->nsim++; *sp0 = sq[aa0[i0++]]; *sp1 = sq[aa1[i1++]]; if (toupper(*sp0) == toupper(*sp1)) {aln->nident++; *spa = M_IDENT;} else if (pst.dnaseq==1 && ((*sp0 == 'T' && *sp1 == 'U') || (*sp0=='U' && *sp1=='T'))) { aln->nident++; *spa=M_IDENT; } sp0++; sp1++; spa++; } else { if (op==0) op = *rp++; if (op>0) { *sp0++ = '-'; *sp1++ = sq[aa1[i1++]]; *spa++ = M_DEL; op--; lenc++; aln->ngap_q++; } else { *sp0++ = sq[aa0[i0++]]; *sp1++ = '-'; *spa++ = M_DEL; op++; lenc++; aln->ngap_l++; } } } *nc = lenc; *spa = '\0'; /* now we have the middle, get the right end */ #ifndef LFASTA /* how much extra to show at end ? */ if (!aln->llcntx_flg) { ns = mins + lenc + aln->llen; /* show an extra line? */ ns -= (itmp = ns %aln->llen); /* itmp = left over on last line */ if (itmp>aln->llen/2) ns += aln->llen; /* more than 1/2 , use another*/ nd = ns - (mins+lenc); /* this much extra */ } else nd = aln->llcntx; if (nd > max(n0-a_res.max0,n1-a_res.max1)) nd = max(n0-a_res.max0,n1-a_res.max1); if (aln->showall==1) { nd = max(n0-a_res.max0,n1-a_res.max1); /* reset for showall=1 */ /* get right end */ aancpy(seqc0+mins+lenc,(char *)aa0+a_res.max0,n0-a_res.max0,pst); aancpy(seqc1+mins+lenc,(char *)aa1+a_res.max1,n1-a_res.max1,pst); /* fill with blanks - this is required to use one 'nc' */ memset(seqc0+mins+lenc+n0-a_res.max0,' ',nd-(n0-a_res.max0)); memset(seqc1+mins+lenc+n1-a_res.max1,' ',nd-(n1-a_res.max1)); } else { if ((nd-(n0-a_res.max0))>0) { aancpy(seqc0+mins+lenc,(char *)aa0+a_res.max0,n0-a_res.max0,pst); memset(seqc0+mins+lenc+n0-a_res.max0,' ',nd-(n0-a_res.max0)); } else aancpy(seqc0+mins+lenc,(char *)aa0+a_res.max0,nd,pst); if ((nd-(n1-a_res.max1))>0) { aancpy(seqc1+mins+lenc,(char *)aa1+a_res.max1,n1-a_res.max1,pst); memset(seqc1+mins+lenc+n1-a_res.max1,' ',nd-(n1-a_res.max1)); } else aancpy(seqc1+mins+lenc,(char *)aa1+a_res.max1,nd,pst); } #else /* LFASTA */ nd = 0; #endif /* #undef LFASTA */ return mins+lenc+nd; } int calcons_a(const unsigned char *aa0, unsigned char *aa0a, int n0, const unsigned char *aa1, int n1, int *nc, struct a_struct *aln, struct a_res_str a_res, struct pstruct pst, char *seqc0, char *seqc0a, char *seqc1, char *seqca, char *ann_arr, struct f_struct *f_str) { int i0, i1; int op, lenc, nd, ns, itmp; char *sp0, *sp0a, *sp1, *spa, *sq; int *rp; int mins, smins; if (pst.ext_sq_set) {sq = pst.sqx;} else {sq = pst.sq;} aln->amin0 = a_res.min0; aln->amax0 = a_res.max0; aln->amin1 = a_res.min1; aln->amax1 = a_res.max1; /* first fill in the ends */ /* #define LFASTA */ #ifndef LFASTA if (min(a_res.min0,a_res.min1)llen || aln->showall==1) /* will we show all the start ?*/ if (a_res.min0>=a_res.min1) { /* aa0 extends more to left */ smins=0; if (aln->showall==1) mins=a_res.min0; else mins = min(a_res.min0,aln->llcntx); aancpy(seqc0,(char *)aa0+a_res.min0-mins,mins,pst); aln->smin0 = a_res.min0-mins; if ((mins-a_res.min1)>0) { memset(seqc1,' ',mins-a_res.min1); aancpy(seqc1+mins-a_res.min1,(char *)aa1,a_res.min1,pst); aln->smin1 = 0; } else { aancpy(seqc1,(char *)aa1+a_res.min1-mins,mins,pst); aln->smin1 = a_res.min1-mins; } } else { smins=0; if (aln->showall == 1) mins=a_res.min1; else mins = min(a_res.min1,aln->llcntx); aancpy(seqc1,(char *)(aa1+a_res.min1-mins),mins,pst); aln->smin1 = a_res.min1-mins; if ((mins-a_res.min0)>0) { memset(seqc0,' ',mins-a_res.min0); aancpy(seqc0+mins-a_res.min0,(char *)aa0,a_res.min0,pst); aln->smin0 = 0; } else { aancpy(seqc0,(char *)aa0+a_res.min0-mins,mins,pst); aln->smin0 = a_res.min0-mins; } } else { mins= min(aln->llcntx,min(a_res.min0,a_res.min1)); smins=mins; aln->smin0=a_res.min0; aln->smin1=a_res.min1; aancpy(seqc0,(char *)aa0+a_res.min0-mins,mins,pst); aancpy(seqc1,(char *)aa1+a_res.min1-mins,mins,pst); } #else aln->smin0 = a_res.min0; aln->smin1 = a_res.min1; smins = mins = 0; #endif /* now get the middle */ memset(seqca,M_BLANK,mins); memset(seqc0a,' ',mins); spa = seqca+mins; sp0 = seqc0+mins; sp0a = seqc0a+mins; sp1 = seqc1+mins; rp = a_res.res; lenc = aln->nident = aln->nsim = aln->ngap_q = aln->ngap_l = aln->nfs =op = 0; i0 = a_res.min0; i1 = a_res.min1; while (i0 < a_res.max0 || i1 < a_res.max1) { if (op == 0 && *rp == 0) { op = *rp++; lenc++; if ((itmp=f_str->pam2p[0][i0][aa1[i1]])<0) { *spa = M_NEG; } else if (itmp == 0) { *spa = M_ZERO;} else {*spa = M_POS;} if (*spa == M_POS || *spa==M_ZERO) aln->nsim++; *sp0a++ = ann_arr[aa0a[i0]]; *sp0 = sq[aa0[i0++]]; *sp1 = sq[aa1[i1++]]; if (toupper(*sp0) == toupper(*sp1)) {aln->nident++; *spa = M_IDENT;} else if (pst.dnaseq==1 && ((*sp0 == 'T' && *sp1 == 'U') || (*sp0=='U' && *sp1=='T'))) { aln->nident++; *spa=M_IDENT; } sp0++; sp1++; spa++; } else { if (op==0) op = *rp++; if (op>0) { *sp0++ = '-'; *sp0a++ = ' '; *sp1++ = sq[aa1[i1++]]; *spa++ = M_DEL; op--; lenc++; aln->ngap_q++; } else { *sp0a++ = ann_arr[aa0a[i0]]; *sp0++ = sq[aa0[i0++]]; *sp1++ = '-'; *spa++ = M_DEL; op++; lenc++; aln->ngap_l++; } } } *nc = lenc; *spa = '\0'; /* now we have the middle, get the right end */ #ifndef LFASTA /* how much extra to show at end ? */ if (!aln->llcntx_flg) { ns = mins + lenc + aln->llen; /* show an extra line? */ ns -= (itmp = ns %aln->llen); /* itmp = left over on last line */ if (itmp>aln->llen/2) ns += aln->llen; /* more than 1/2 , use another*/ nd = ns - (mins+lenc); /* this much extra */ } else nd = aln->llcntx; if (nd > max(n0-a_res.max0,n1-a_res.max1)) nd = max(n0-a_res.max0,n1-a_res.max1); if (aln->showall==1) { nd = max(n0-a_res.max0,n1-a_res.max1); /* reset for showall=1 */ /* get right end */ aancpy(seqc0+mins+lenc,(char *)aa0+a_res.max0,n0-a_res.max0,pst); aancpy(seqc1+mins+lenc,(char *)aa1+a_res.max1,n1-a_res.max1,pst); /* fill with blanks - this is required to use one 'nc' */ memset(seqc0+mins+lenc+n0-a_res.max0,' ',nd-(n0-a_res.max0)); memset(seqc1+mins+lenc+n1-a_res.max1,' ',nd-(n1-a_res.max1)); } else { if ((nd-(n0-a_res.max0))>0) { aancpy(seqc0+mins+lenc,(char *)aa0+a_res.max0,n0-a_res.max0,pst); memset(seqc0+mins+lenc+n0-a_res.max0,' ',nd-(n0-a_res.max0)); } else aancpy(seqc0+mins+lenc,(char *)aa0+a_res.max0,nd,pst); if ((nd-(n1-a_res.max1))>0) { aancpy(seqc1+mins+lenc,(char *)aa1+a_res.max1,n1-a_res.max1,pst); memset(seqc1+mins+lenc+n1-a_res.max1,' ',nd-(n1-a_res.max1)); } else aancpy(seqc1+mins+lenc,(char *)aa1+a_res.max1,nd,pst); } #else /* LFASTA */ nd = 0; #endif /* #undef LFASTA */ return mins+lenc+nd; } static void update_code(char *al_str, int al_str_max, int op, int op_cnt); /* build an array of match/ins/del - length strings */ int calc_code(const unsigned char *aa0, const int n0, const unsigned char *aa1, const int n1, struct a_struct *aln, struct a_res_str a_res, struct pstruct pst, char *al_str, int al_str_n, struct f_struct *f_str) { int i0, i1, nn1; int op, lenc, nd, ns, itmp; int p_op, op_cnt; const unsigned char *aa1p; char tmp_cnt[20]; char sp0, sp1, *sq; int *rp; int mins, smins; if (pst.ext_sq_set) { sq = pst.sqx; } else { sq = pst.sq; } #ifndef TFASTA aa1p = aa1; nn1 = n1; #else aa1p = f_str->aa1x; nn1 = f_str->n10; #endif aln->amin0 = a_res.min0; aln->amax0 = a_res.max0; aln->amin1 = a_res.min1; aln->amax1 = a_res.max1; rp = a_res.res; lenc = aln->nident = aln->nsim = aln->ngap_q = aln->ngap_l = aln->nfs = op = p_op = 0; op_cnt = 0; i0 = a_res.min0; i1 = a_res.min1; tmp_cnt[0]='\0'; while (i0 < a_res.max0 || i1 < a_res.max1) { if (op == 0 && *rp == 0) { if (pst.pam2[0][aa0[i0]][aa1p[i1]]>=0) { aln->nsim++;} sp0 = sq[aa0[i0++]]; sp1 = sq[aa1p[i1++]]; if (p_op == 0 || p_op==3) { if (sp0 != '*' && sp1 != '*') { if (p_op == 3) { update_code(al_str,al_str_n-strlen(al_str),p_op,op_cnt); op_cnt = 1; p_op = 0; } else {op_cnt++;} } else { update_code(al_str,al_str_n-strlen(al_str),p_op,op_cnt); op_cnt = 1; p_op = 3; } } else { update_code(al_str,al_str_n-strlen(al_str),p_op,op_cnt); op_cnt = 1; p_op = 0; } op = *rp++; lenc++; if (toupper(sp0) == toupper(sp1)) aln->nident++; else if (pst.dnaseq==1) { if ((toupper(sp0) == 'T' && toupper(sp1) == 'U') || (toupper(sp0)=='U' && toupper(sp1)=='T')) aln->nident++; else if (toupper(sp0) == 'N') aln->ngap_q++; else if (toupper(sp1) == 'N') aln->ngap_l++; } } else { if (op==0) op = *rp++; if (op>0) { if (p_op == 1) { op_cnt++;} else { update_code(al_str,al_str_n - strlen(al_str),p_op,op_cnt); op_cnt = 1; p_op = 1; } op--; lenc++; i1++; aln->ngap_q++; } else { if (p_op == 2) { op_cnt++;} else { update_code(al_str,al_str_n - strlen(al_str),p_op,op_cnt); op_cnt = 1; p_op = 2; } op++; lenc++; i0++; aln->ngap_l++; } } } update_code(al_str,al_str_n - strlen(al_str),p_op,op_cnt); return lenc; } static void update_code(char *al_str, int al_str_max, int op, int op_cnt) { char op_char[5]={"=-+*"}; char tmp_cnt[20]; sprintf(tmp_cnt,"%c%d",op_char[op],op_cnt); strncat(al_str,tmp_cnt,al_str_max); } int calc_id(const unsigned char *aa0, const int n0, const unsigned char *aa1, const int n1, struct a_struct *aln, struct a_res_str a_res, struct pstruct pst, struct f_struct *f_str) { int i0, i1, nn1, n_id; int op, lenc, nd, ns, itmp; int sp0, sp1; const unsigned char *aa1p; int *rp; char *sq; if (pst.ext_sq_set) { sq = pst.sqx; } else { sq = pst.sq; } #ifndef TFASTA aa1p = aa1; nn1 = n1; #else aa1p = f_str->aa1x; nn1 = f_str->n10; #endif rp = a_res.res; lenc = n_id = aln->nsim = aln->ngap_q = aln->ngap_l = aln->nfs = op = 0; i0 = a_res.min0; i1 = a_res.min1; while (i0 < a_res.max0 || i1 < a_res.max1) { if (op == 0 && *rp == 0) { op = *rp++; lenc++; if (pst.pam2[0][aa0[i0]][aa1p[i1]]>=0) { aln->nsim++;} sp0 = sq[aa0[i0++]]; sp1 = sq[aa1p[i1++]]; if (toupper(sp0) == toupper(sp1)) n_id++; else if (pst.dnaseq==1 && ((sp0=='T' && sp1== 'U')||(sp0=='U' && sp1=='T'))) n_id++; } else { if (op==0) op = *rp++; if (op>0) {op--; lenc++; i1++; aln->ngap_q++; } else {op++; lenc++; i0++; aln->ngap_l++; } } } aln->nident = n_id; return lenc; } #ifdef PCOMPLIB #include "p_mw.h" void update_params(struct qmng_str *qm_msg, struct pstruct *ppst) { ppst->n0 = qm_msg->n0; } #endif