1 /* copyright (c) 1994, 1995, 1996 William R. Pearson */
3 /* $Name: fa_34_26_5 $ - $Id: dropnsw.c,v 1.35 2006/10/19 14:49:14 wrp Exp $ */
6 this is a slower version of dropgsw.c that implements the Smith-Waterman
7 algorithm. It lacks the shortcuts in dropgsw.c that prevent scores less
8 than the penalty for the first residue in a gap from being generated.
10 Thus, dropnsw.c should be used for tests with very large gap penalties,
11 and is more appropriate for programs like prss3, which are interested
12 in accurate low scores.
15 /* the do_walign() code in this file is not thread_safe */
16 /* init_work(), do_work(), are thread safe */
27 static char *verstr="3.5 Sept 2006";
29 struct swstr { int H, E;};
43 #include "drop_func.h"
45 extern int do_karlin(const unsigned char *aa1, int n1,
46 int **pam2, struct pstruct *ppst,
47 double *aa0_f, double *kar_p, double *lambda, double *H);
48 extern void aancpy(char *to, char *from, int count, struct pstruct pst);
49 int ALIGN(const unsigned char *A, const unsigned char *B, int M, int N,
50 int **W, int IW, int G, int H, int *S, int *NC,
51 struct f_struct *f_str);
53 /* initialize for Smith-Waterman optimal score */
55 void init_work (unsigned char *aa0, int n0,
57 struct f_struct **f_arg)
63 struct f_struct *f_str;
65 struct swstr *ss, *f_ss, *r_ss;
68 if (ppst->ext_sq_set) {
69 nsq = ppst->nsqx; ip = 1;
72 nsq = ppst->nsq; ip = 0;
75 f_str = (struct f_struct *)calloc(1,sizeof(struct f_struct));
77 /* allocate space for the scoring arrays */
79 if ((ss = (struct swstr *) calloc (maxn0, sizeof (struct swstr)))
81 fprintf (stderr, "cannot allocate ss array %3d\n", n0);
87 if ((f_ss = (struct swstr *) calloc (maxn0, sizeof (struct swstr)))
89 fprintf (stderr, "cannot allocate f_ss array %3d\n", n0);
95 if ((r_ss = (struct swstr *) calloc (n0+2, sizeof (struct swstr)))
97 fprintf (stderr, "cannot allocate r_ss array %3d\n", n0);
103 /* initialize variable (-S) pam matrix */
104 if ((f_str->waa_s= (int *)calloc((nsq+1)*(n0+1),sizeof(int))) == NULL) {
105 fprintf(stderr,"cannot allocate waa_s array %3d\n",nsq*n0);
109 if ((f_str->pam2p[1]= (int **)calloc((n0+1),sizeof(int *))) == NULL) {
110 fprintf(stderr,"cannot allocate pam2p[1] array %3d\n",n0);
114 pam2p = f_str->pam2p[1];
115 if ((pam2p[0]=(int *)calloc((nsq+1)*(n0+1),sizeof(int))) == NULL) {
116 fprintf(stderr,"cannot allocate pam2p[1][] array %3d\n",nsq*n0);
120 for (i=1; i<n0; i++) {
121 pam2p[i]= pam2p[0] + (i*(nsq+1));
124 /* initialize universal (alignment) matrix */
125 if ((f_str->waa_a= (int *)calloc((nsq+1)*(n0+1),sizeof(int))) == NULL) {
126 fprintf(stderr,"cannot allocate waa_a struct %3d\n",nsq*n0);
130 if ((f_str->pam2p[0]= (int **)calloc((n0+1),sizeof(int *))) == NULL) {
131 fprintf(stderr,"cannot allocate pam2p[1] array %3d\n",n0);
135 pam2p = f_str->pam2p[0];
136 if ((pam2p[0]=(int *)calloc((nsq+1)*(n0+1),sizeof(int))) == NULL) {
137 fprintf(stderr,"cannot allocate pam2p[1][] array %3d\n",nsq*n0);
141 for (i=1; i<n0; i++) {
142 pam2p[i]= pam2p[0] + (i*(nsq+1));
146 pwaa effectively has a sequence profile --
147 pwaa[0..n0-1] has pam score for residue 0 (-BIGNUM)
148 pwaa[n0..2n0-1] has pam scores for residue 1 (A)
149 pwaa[2n0..3n-1] has pam scores for residue 2 (R), ...
151 thus: pwaa = f_str->waa_s + (*aa1p++)*n0; sets up pwaa so that
152 *pwaa++ rapidly moves though the scores of the aa1p[] position
153 without further indexing
155 For a real sequence profile, pwaa[0..n0-1] vs ['A'] could have
156 a different score in each position.
159 if (ppst->pam_pssm) {
160 pwaa_s = f_str->waa_s;
161 pwaa_a = f_str->waa_a;
162 for (e = 0; e <=nsq; e++) { /* for each residue in the alphabet */
163 for (f = 0; f < n0; f++) { /* for each position in aa0 */
164 *pwaa_s++ = f_str->pam2p[ip][f][e] = ppst->pam2p[ip][f][e];
165 *pwaa_a++ = f_str->pam2p[0][f][e] = ppst->pam2p[0][f][e];
169 else { /* initialize scanning matrix */
170 pwaa_s = f_str->waa_s;
171 pwaa_a = f_str->waa_a;
172 for (e = 0; e <=nsq; e++) /* for each residue in the alphabet */
173 for (f = 0; f < n0; f++) { /* for each position in aa0 */
174 *pwaa_s++ = f_str->pam2p[ip][f][e]= ppst->pam2[ip][e][aa0[f]];
175 *pwaa_a++ = f_str->pam2p[0][f][e] = ppst->pam2[0][e][aa0[f]];
179 maxn0 = max(3*n0/2,MIN_RES);
180 if ((res = (int *)calloc((size_t)maxn0,sizeof(int)))==NULL) {
181 fprintf(stderr,"cannot allocate alignment results array %d\n",maxn0);
189 void close_work (const unsigned char *aa0, int n0,
190 struct pstruct *ppst, struct f_struct **f_arg)
192 struct f_struct *f_str;
197 if (f_str->kar_p !=NULL) free(f_str->kar_p);
202 free(f_str->pam2p[0][0]);
203 free(f_str->pam2p[0]);
205 free(f_str->pam2p[1][0]);
206 free(f_str->pam2p[1]);
214 /* pstring1 is a message to the manager, currently 512 */
215 /*void get_param(struct pstruct *pstr,char *pstring1)*/
216 void get_param (struct pstruct *pstr, char *pstring1, char *pstring2)
220 char *pg_str="Smith-Waterman";
222 if (pstr->pam_pssm) { strncpy(psi_str,"-PSI",sizeof(psi_str));}
223 else { psi_str[0]='\0';}
226 sprintf (pstring1, " %s (%s) function [%s matrix%s (%d:%d)%s], gap-penalty: %d/%d",
228 sprintf (pstring1, " %s (%s) function [%s matrix%s (%d:%d)%s], open/ext: %d/%d",
230 pg_str, verstr, pstr->pamfile, psi_str, pstr->pam_h,pstr->pam_l,
231 (pstr->ext_sq_set)?"xS":"\0", pstr->gdelval, pstr->ggapval);
233 if (pstring2 != NULL) {
235 sprintf(pstring2,"; pg_name: %s\n; pg_ver: %s\n; pg_matrix: %s (%d:%d)%s\n; pg_gap-pen: %d %d\n",
237 sprintf(pstring2,"; pg_name: %s\n; pg_ver: %s\n; pg_matrix: %s (%d:%d)%s\n; pg_open-ext: %d %d\n",
239 pg_str,verstr,psi_str,pstr->pam_h,pstr->pam_l,
240 (pstr->ext_sq_set)?"xS":"\0",pstr->gdelval,pstr->ggapval);
245 void do_work (const unsigned char *aa0, int n0,
246 const unsigned char *aa1, int n1,
248 struct pstruct *ppst, struct f_struct *f_str,
252 const unsigned char *aa0p, *aa1p;
253 register struct swstr *ssj;
254 struct swstr *ss, *f_ss, *r_ss;
265 rst->segnum = rst->seglen = 1;
273 q = -(ppst->gdelval - ppst->ggapval);
280 /* initialize 0th row */
281 for (ssj=ss; ssj<&ss[n0]; ssj++) {
291 pwaa = waa + (*aa1p++ * n0);
292 for (ssj = ss, aa0p = aa0; ssj < ss+n0; ssj++) {
293 if ((h = h - m) > (f = f - r)) f = h;
294 if ((h = ssj->H - m) > (e = ssj->E - r)) e = h;
302 if (h > score) score = h;
304 } /* done with forward pass */
306 rst->score[0] = score;
308 if(ppst->zsflag == 6 || ppst->zsflag == 16 &&
309 (do_karlin(aa1, n1, ppst->pam2[0], ppst,f_str->aa0_f,
310 f_str->kar_p, &lambda, &H)>0)) {
311 rst->comp = 1.0/lambda;
314 else {rst->comp = rst->H = -1.0;}
315 } /* here we should be all done */
317 void do_opt (const unsigned char *aa0, int n0,
318 const unsigned char *aa1, int n1,
320 struct pstruct *pst, struct f_struct *f_str,
321 struct rstruct *rstr)
325 int do_walign (const unsigned char *aa0, const int n0,
326 const unsigned char *aa1, const int n1,
328 struct pstruct *ppst,
329 struct f_struct *f_str,
330 struct a_res_str *a_res,
333 const unsigned char *aa0p, *aa1p;
336 register struct swstr *ssj;
337 struct swstr *f_ss, *r_ss, *ss;
342 int cost, I, J, K, L;
347 waa = f_str->waa_a; /* this time use universal pam2[0] */
350 q = -(ppst->gdelval - ppst->ggapval);
358 /* initialize 0th row */
359 for (ssj=ss; ssj<ss+n0; ssj++) {
370 pwaa = waa + (*aa1p++ * n0);
371 for (ssj = ss, aa0p = aa0; ssj < ss+n0; ssj++) {
372 if ((h = h - m) > /* gap open from left best */
373 /* gap extend from left gapped */
374 (f = f - r)) f = h; /* if better, use new gap opened */
375 if ((h = ssj->H - m) > /* gap open from up best */
376 /* gap extend from up gap */
377 (e = ssj->E - r)) e = h; /* if better, use new gap opened */
378 h = p + *pwaa++; /* diagonal match */
379 if (h < 0 ) h = 0; /* ? < 0, reset to 0 */
380 if (h < f ) h = f; /* left gap better, reset */
381 if (h < e ) h = e; /* up gap better, reset */
382 p = ssj->H; /* save previous best score */
383 ssj->H = h; /* save (new) up diag-matched */
384 ssj->E = e; /* save upper gap opened */
385 if (h > score) { /* ? new best score */
386 score = h; /* save best */
388 J = (int)(ssj-ss); /* column */
392 } /* done with forward pass */
393 if (score <= 0) return 0;
395 /* to get the start point, go backwards */
397 /* 18-June-2003 fix bug in backtracking code to identify start of
398 alignment. Code used pam2[0][aa0[j]][aa1[i]] instead of
399 pam2p[0][j][aa1[i]]. Ideally, it would use waa_a.
403 for (ssj=ss+J; ssj>=ss; ssj--) ssj->H= ssj->E= -1;
405 for (i=I; i>=0; i--) {
407 p = (i == I) ? 0 : -1;
408 for (ssj=ss+J, j= J; ssj>=ss; ssj--,j--) {
410 ssj->E=max(ssj->E,ssj->H-q)-r;
411 h = max(max(ssj->E,f),p+f_str->pam2p[0][j][aa1[i]]);
418 if (cost >= score) goto found;
425 /* printf(" %d: L: %3d-%3d/%3d; K: %3d-%3d/%3d\n",score,L,J,n0,K,I,n1); */
427 /* in the f_str version, the *res array is already allocated at 4*n0/3 */
429 a_res->res = f_str->res;
431 a_res->max0 = J+1; a_res->min0 = L; a_res->max1 = I+1; a_res->min1 = K;
433 /* ALIGN(&aa1[K-1],&aa0[L-1],I-K+1,J-L+1,ppst->pam2[0],q,r,res,nres,f_str); */
435 /* this code no longer refers to aa0[], it used pam2p[0][L] instead */
436 ALIGN(&aa0[L-1],&aa1[K-1],J-L+1,I-K+1,f_str->pam2p[0],L,q,r,
437 a_res->res,&a_res->nres,f_str);
439 /* DISPLAY(&aa0[L-1],&aa1[K-1],J-L+1,I-K+1,res,L,K,ppst->sq); */
444 static int CHECK_SCORE(const unsigned char *A, const unsigned char *B, int M, int N,
445 int *S, int **W, int IW, int G, int H, int *nres);
447 #define gap(k) ((k) <= 0 ? 0 : g+h*(k)) /* k-symbol indel cost */
449 /* Append "Delete k" op */
452 *last = (*sapp)[-1] -= (k); \
454 *last = (*sapp)[0] = -(k); \
459 /* Append "Insert k" op */
462 *last = (*sapp)[-1] += (k); \
464 *last = (*sapp)[0] = (k); \
469 #define REP { *last = (*sapp)[0] = 0; (*sapp)++; } /* Append "Replace" op */
476 print_seq_prof(unsigned char *A, int M,
477 unsigned char *B, int N,
478 int **w, int iw, int dir) {
480 int i_max, j_max, i,j;
484 for (i=1; i<=min(60,M); i++) {
485 fprintf(stderr,"%c",aa[A[i]]);
487 fprintf(stderr, - %d\n,M);
489 for (i=0; i<min(60,M); i++) {
491 for (j=1; j<21; j++) {
492 if (w[iw+i][j]> i_max) {
497 fprintf(stderr,"%c",aa[j_max]);
500 for (i=1; i<=min(60,N); i++) {
501 fprintf(stderr,"%c",aa[B[i]]);
503 fprintf(stderr," -%c: %d,%d\n",c_dir[dir],M,N);
507 /* align(A,B,M,N,tb,te) returns the cost of an optimum conversion between
508 A[1..M] and B[1..N] that begins(ends) with a delete if tb(te) is zero
509 and appends such a conversion to the current script. */
512 align(const unsigned char *A, const unsigned char *B, int M, int N,
513 int tb, int te, int **w, int iw, int g, int h,
514 struct f_struct *f_str, int dir,
515 int **sapp, int *last)
517 int midi, midj, type; /* Midpoint, type, and cost */
522 register int c, e, d, s;
524 struct swstr *f_ss, *r_ss;
526 /* print_seq_prof(A,M,B,N,w,iw,dir); */
533 /* Boundary cases: M <= 1 or N == 0 */
546 if (tb < te) tb = te;
547 midc = (tb-h) - gap(N);
551 for (j = 1; j <= N; j++) {
552 c = -gap(j-1) + wa[B[j]] - gap(N-j);
553 if (c > midc) { midc = c; midj = j;}
560 if (midj > 1) { INS(midj-1)}
562 if (midj < N) { INS(N-midj)}
567 /* Divide: Find optimum midpoint (midi,midj) of cost midc */
569 midi = M/2; /* Forward phase: */
570 f_ss[0].H = 0; /* Compute H(M/2,k) & E(M/2,k) for all k */
572 for (j = 1; j <= N; j++)
573 { f_ss[j].H = t = t-h;
577 for (i = 1; i <= midi; i++)
579 f_ss[0].H = c = t = t-h;
583 for (j = 1; j <= N; j++)
584 { if ((c = c - m) > (e = e - h)) e = c;
585 if ((c = f_ss[j].H - m) > (d = f_ss[j].E - h)) d = c;
594 f_ss[0].E = f_ss[0].H;
596 r_ss[N].H = 0; /* Reverse phase: */
597 t = -g; /* Compute R(M/2,k) & S(M/2,k) for all k */
598 for (j = N-1; j >= 0; j--)
599 { r_ss[j].H = t = t-h;
603 for (i = M-1; i >= midi; i--)
605 r_ss[N].H = c = t = t-h;
607 /* wa = w[A[i+1]]; */
609 for (j = N-1; j >= 0; j--)
610 { if ((c = c - m) > (e = e - h)) e = c;
611 if ((c = r_ss[j].H - m) > (d = r_ss[j].E - h)) d = c;
620 r_ss[N].E = r_ss[N].H;
622 midc = f_ss[0].H+r_ss[0].H; /* Find optimal midpoint */
625 for (j = 0; j <= N; j++)
626 if ((c = f_ss[j].H + r_ss[j].H) >= midc)
627 if (c > midc || f_ss[j].H != f_ss[j].E && r_ss[j].H == r_ss[j].E)
631 for (j = N; j >= 0; j--)
632 if ((c = f_ss[j].E + r_ss[j].E + g) > midc)
639 /* Conquer: recursively around midpoint */
642 { c1 = align(A,B,midi,midj,tb,-g,w,iw,g,h,f_str,0, sapp, last);
643 c2 = align(A+midi,B+midj,M-midi,N-midj,-g,te,w,iw+midi,g,h,f_str,1,sapp,last);
646 { align(A,B,midi-1,midj,tb,0,w,iw,g,h,f_str,2,sapp, last);
648 align(A+midi+1,B+midj,M-midi-1,N-midj,0,te,w,iw+midi+1,g,h,f_str,3,sapp,last);
653 /* Interface and top level of comparator */
655 int ALIGN(const unsigned char *A, const unsigned char *B, int M, int N,
656 int **W, int IW, int G, int H, int *S, int *NC,
657 struct f_struct *f_str)
659 struct swstr *f_ss, *r_ss;
666 if ((f_ss = (struct swstr *) calloc (N+2, sizeof (struct swstr)))
668 fprintf (stderr, "cannot allocate f_ss array %3d\n", N+2);
674 if ((r_ss = (struct swstr *) calloc (N+2, sizeof (struct swstr)))
676 fprintf (stderr, "cannot allocate r_ss array %3d\n", N+2);
682 /* print_seq_prof(A,M,W,IW); */
683 c = align(A,B,M,N,-G,-G,W,IW,G,H,f_str,0,&sapp, &last); /* OK, do it */
685 ck = CHECK_SCORE(A,B,M,N,S,W,IW,G,H,NC);
686 if (c != ck) printf("Check_score error. %d != %d\n",c,ck);
689 free(r_ss); free(f_ss);
694 /* Alignment display routine */
696 static char ALINE[51], BLINE[51], CLINE[51];
698 void DISPLAY(unsigned char *A, unsigned char *B, int M, int N,
699 int *S, int AP, int BP, char *sq)
700 { register char *a, *b, *c;
701 register int i, j, op;
704 i = j = op = lines = 0;
710 while (i < M || j < N)
711 { if (op == 0 && *S == 0)
715 *c++ = (*a++ == *b++) ? '|' : ' ';
732 if (a >= ALINE+50 || i >= M && j >= N)
733 { *a = *b = *c = '\0';
734 printf("\n%5d ",50*lines++);
735 for (b = ALINE+10; b <= a; b += 10)
739 printf("\n%5d %s\n %s\n%5d %s\n",ap,ALINE,CLINE,bp,BLINE);
749 /* CHECK_SCORE - return the score of the alignment stored in S */
751 static int CHECK_SCORE(const unsigned char *A, const unsigned char *B,
752 int M, int N, int *S, int **w, int iw,
753 int g, int h, int *NC)
755 register int i, j, op, nc;
758 /* print_seq_prof(A,M,w,iw); */
760 score = i = j = op = nc = 0;
761 while (i < M || j < N) {
764 score = w[iw+i][B[++j]] + score;
769 score = score - (g+op*h);
773 score = score - (g-op*h);
783 pre_cons(const unsigned char *aa1, int n1, int frame, struct f_struct *f_str) {}
785 /* aln_func_vals - set up aln.qlfact, qlrev, llfact, llmult, frame, llrev */
786 /* call from calcons, calc_id, calc_code */
788 aln_func_vals(int frame, struct a_struct *aln) {
790 aln->llfact = aln->llmult = aln->qlfact = 1;
791 aln->qlrev = aln->llrev = 0;
795 /* 29-June-2003 this version has been modified to use pst.pam2p
796 instead of pam2 to indicate similarity */
800 int calcons(const unsigned char *aa0, int n0,
801 const unsigned char *aa1, int n1,
803 struct a_struct *aln,
804 struct a_res_str a_res,
806 char *seqc0, char *seqc1, char *seqca,
807 struct f_struct *f_str)
810 int op, lenc, nd, ns, itmp;
811 char *sp0, *sp1, *spa, *sq;
815 if (pst.ext_sq_set) { sq = pst.sqx;}
818 aln->amin0 = a_res.min0;
819 aln->amax0 = a_res.max0;
820 aln->amin1 = a_res.min1;
821 aln->amax1 = a_res.max1;
825 if (min(a_res.min0,a_res.min1)<aln->llen || aln->showall==1) /* will we show all the start ?*/
826 if (a_res.min0>=a_res.min1) { /* aa0 extends more to left */
828 if (aln->showall==1) mins=a_res.min0;
829 else mins = min(a_res.min0,aln->llcntx);
830 aancpy(seqc0,(char *)aa0+a_res.min0-mins,mins,pst);
831 aln->smin0 = a_res.min0-mins;
832 if ((mins-a_res.min1)>0) {
833 memset(seqc1,' ',mins-a_res.min1);
834 aancpy(seqc1+mins-a_res.min1,(char *)aa1,a_res.min1,pst);
838 aancpy(seqc1,(char *)aa1+a_res.min1-mins,mins,pst);
839 aln->smin1 = a_res.min1-mins;
844 if (aln->showall == 1) mins=a_res.min1;
845 else mins = min(a_res.min1,aln->llcntx);
846 aancpy(seqc1,(char *)(aa1+a_res.min1-mins),mins,pst);
847 aln->smin1 = a_res.min1-mins;
848 if ((mins-a_res.min0)>0) {
849 memset(seqc0,' ',mins-a_res.min0);
850 aancpy(seqc0+mins-a_res.min0,(char *)aa0,a_res.min0,pst);
854 aancpy(seqc0,(char *)aa0+a_res.min0-mins,mins,pst);
855 aln->smin0 = a_res.min0-mins;
859 mins= min(aln->llcntx,min(a_res.min0,a_res.min1));
861 aln->smin0=a_res.min0;
862 aln->smin1=a_res.min1;
863 aancpy(seqc0,(char *)aa0+a_res.min0-mins,mins,pst);
864 aancpy(seqc1,(char *)aa1+a_res.min1-mins,mins,pst);
867 aln->smin0 = a_res.min0;
868 aln->smin1 = a_res.min1;
872 /* now get the middle */
874 memset(seqca,M_BLANK,mins);
880 lenc = aln->nident = aln->nsim = aln->ngap_q = aln->ngap_l = aln->nfs =op = 0;
884 while (i0 < a_res.max0 || i1 < a_res.max1) {
885 if (op == 0 && *rp == 0) {
888 if ((itmp=f_str->pam2p[0][i0][aa1[i1]])<0) { *spa = M_NEG; }
889 else if (itmp == 0) { *spa = M_ZERO;}
891 if (*spa == M_POS || *spa==M_ZERO) aln->nsim++;
893 *sp0 = sq[aa0[i0++]];
894 *sp1 = sq[aa1[i1++]];
896 if (toupper(*sp0) == toupper(*sp1)) {aln->nident++; *spa = M_IDENT;}
897 else if (pst.dnaseq==1 && ((*sp0 == 'T' && *sp1 == 'U') ||
898 (*sp0=='U' && *sp1=='T'))) {
899 aln->nident++; *spa=M_IDENT;
905 if (op==0) op = *rp++;
908 *sp1++ = sq[aa1[i1++]];
915 *sp0++ = sq[aa0[i0++]];
927 /* now we have the middle, get the right end */
930 /* how much extra to show at end ? */
931 if (!aln->llcntx_flg) {
932 ns = mins + lenc + aln->llen; /* show an extra line? */
933 ns -= (itmp = ns %aln->llen); /* itmp = left over on last line */
934 if (itmp>aln->llen/2) ns += aln->llen; /* more than 1/2 , use another*/
935 nd = ns - (mins+lenc); /* this much extra */
937 else nd = aln->llcntx;
939 if (nd > max(n0-a_res.max0,n1-a_res.max1))
940 nd = max(n0-a_res.max0,n1-a_res.max1);
942 if (aln->showall==1) {
943 nd = max(n0-a_res.max0,n1-a_res.max1); /* reset for showall=1 */
945 aancpy(seqc0+mins+lenc,(char *)aa0+a_res.max0,n0-a_res.max0,pst);
946 aancpy(seqc1+mins+lenc,(char *)aa1+a_res.max1,n1-a_res.max1,pst);
947 /* fill with blanks - this is required to use one 'nc' */
948 memset(seqc0+mins+lenc+n0-a_res.max0,' ',nd-(n0-a_res.max0));
949 memset(seqc1+mins+lenc+n1-a_res.max1,' ',nd-(n1-a_res.max1));
952 if ((nd-(n0-a_res.max0))>0) {
953 aancpy(seqc0+mins+lenc,(char *)aa0+a_res.max0,n0-a_res.max0,pst);
954 memset(seqc0+mins+lenc+n0-a_res.max0,' ',nd-(n0-a_res.max0));
956 else aancpy(seqc0+mins+lenc,(char *)aa0+a_res.max0,nd,pst);
958 if ((nd-(n1-a_res.max1))>0) {
959 aancpy(seqc1+mins+lenc,(char *)aa1+a_res.max1,n1-a_res.max1,pst);
960 memset(seqc1+mins+lenc+n1-a_res.max1,' ',nd-(n1-a_res.max1));
962 else aancpy(seqc1+mins+lenc,(char *)aa1+a_res.max1,nd,pst);
972 int calcons_a(const unsigned char *aa0, unsigned char *aa0a, int n0,
973 const unsigned char *aa1, int n1,
975 struct a_struct *aln,
976 struct a_res_str a_res,
978 char *seqc0, char *seqc0a, char *seqc1, char *seqca,
979 char *ann_arr, struct f_struct *f_str)
982 int op, lenc, nd, ns, itmp;
983 char *sp0, *sp0a, *sp1, *spa, *sq;
987 if (pst.ext_sq_set) {sq = pst.sqx;}
990 aln->amin0 = a_res.min0;
991 aln->amax0 = a_res.max0;
992 aln->amin1 = a_res.min1;
993 aln->amax1 = a_res.max1;
995 /* first fill in the ends */
999 if (min(a_res.min0,a_res.min1)<aln->llen || aln->showall==1) /* will we show all the start ?*/
1000 if (a_res.min0>=a_res.min1) { /* aa0 extends more to left */
1002 if (aln->showall==1) mins=a_res.min0;
1003 else mins = min(a_res.min0,aln->llcntx);
1004 aancpy(seqc0,(char *)aa0+a_res.min0-mins,mins,pst);
1005 aln->smin0 = a_res.min0-mins;
1006 if ((mins-a_res.min1)>0) {
1007 memset(seqc1,' ',mins-a_res.min1);
1008 aancpy(seqc1+mins-a_res.min1,(char *)aa1,a_res.min1,pst);
1012 aancpy(seqc1,(char *)aa1+a_res.min1-mins,mins,pst);
1013 aln->smin1 = a_res.min1-mins;
1018 if (aln->showall == 1) mins=a_res.min1;
1019 else mins = min(a_res.min1,aln->llcntx);
1020 aancpy(seqc1,(char *)(aa1+a_res.min1-mins),mins,pst);
1021 aln->smin1 = a_res.min1-mins;
1022 if ((mins-a_res.min0)>0) {
1023 memset(seqc0,' ',mins-a_res.min0);
1024 aancpy(seqc0+mins-a_res.min0,(char *)aa0,a_res.min0,pst);
1028 aancpy(seqc0,(char *)aa0+a_res.min0-mins,mins,pst);
1029 aln->smin0 = a_res.min0-mins;
1033 mins= min(aln->llcntx,min(a_res.min0,a_res.min1));
1035 aln->smin0=a_res.min0;
1036 aln->smin1=a_res.min1;
1037 aancpy(seqc0,(char *)aa0+a_res.min0-mins,mins,pst);
1038 aancpy(seqc1,(char *)aa1+a_res.min1-mins,mins,pst);
1041 aln->smin0 = a_res.min0;
1042 aln->smin1 = a_res.min1;
1046 /* now get the middle */
1048 memset(seqca,M_BLANK,mins);
1049 memset(seqc0a,' ',mins);
1056 lenc = aln->nident = aln->nsim = aln->ngap_q = aln->ngap_l = aln->nfs =op = 0;
1060 while (i0 < a_res.max0 || i1 < a_res.max1) {
1061 if (op == 0 && *rp == 0) {
1064 if ((itmp=f_str->pam2p[0][i0][aa1[i1]])<0) { *spa = M_NEG; }
1065 else if (itmp == 0) { *spa = M_ZERO;}
1066 else {*spa = M_POS;}
1067 if (*spa == M_POS || *spa==M_ZERO) aln->nsim++;
1069 *sp0a++ = ann_arr[aa0a[i0]];
1070 *sp0 = sq[aa0[i0++]];
1071 *sp1 = sq[aa1[i1++]];
1073 if (toupper(*sp0) == toupper(*sp1)) {aln->nident++; *spa = M_IDENT;}
1074 else if (pst.dnaseq==1 && ((*sp0 == 'T' && *sp1 == 'U') ||
1075 (*sp0=='U' && *sp1=='T'))) {
1076 aln->nident++; *spa=M_IDENT;
1079 sp0++; sp1++; spa++;
1082 if (op==0) op = *rp++;
1086 *sp1++ = sq[aa1[i1++]];
1093 *sp0a++ = ann_arr[aa0a[i0]];
1094 *sp0++ = sq[aa0[i0++]];
1106 /* now we have the middle, get the right end */
1109 /* how much extra to show at end ? */
1110 if (!aln->llcntx_flg) {
1111 ns = mins + lenc + aln->llen; /* show an extra line? */
1112 ns -= (itmp = ns %aln->llen); /* itmp = left over on last line */
1113 if (itmp>aln->llen/2) ns += aln->llen; /* more than 1/2 , use another*/
1114 nd = ns - (mins+lenc); /* this much extra */
1116 else nd = aln->llcntx;
1118 if (nd > max(n0-a_res.max0,n1-a_res.max1))
1119 nd = max(n0-a_res.max0,n1-a_res.max1);
1121 if (aln->showall==1) {
1122 nd = max(n0-a_res.max0,n1-a_res.max1); /* reset for showall=1 */
1124 aancpy(seqc0+mins+lenc,(char *)aa0+a_res.max0,n0-a_res.max0,pst);
1125 aancpy(seqc1+mins+lenc,(char *)aa1+a_res.max1,n1-a_res.max1,pst);
1126 /* fill with blanks - this is required to use one 'nc' */
1127 memset(seqc0+mins+lenc+n0-a_res.max0,' ',nd-(n0-a_res.max0));
1128 memset(seqc1+mins+lenc+n1-a_res.max1,' ',nd-(n1-a_res.max1));
1131 if ((nd-(n0-a_res.max0))>0) {
1132 aancpy(seqc0+mins+lenc,(char *)aa0+a_res.max0,n0-a_res.max0,pst);
1133 memset(seqc0+mins+lenc+n0-a_res.max0,' ',nd-(n0-a_res.max0));
1135 else aancpy(seqc0+mins+lenc,(char *)aa0+a_res.max0,nd,pst);
1137 if ((nd-(n1-a_res.max1))>0) {
1138 aancpy(seqc1+mins+lenc,(char *)aa1+a_res.max1,n1-a_res.max1,pst);
1139 memset(seqc1+mins+lenc+n1-a_res.max1,' ',nd-(n1-a_res.max1));
1141 else aancpy(seqc1+mins+lenc,(char *)aa1+a_res.max1,nd,pst);
1148 return mins+lenc+nd;
1152 update_code(char *al_str, int al_str_max, int op, int op_cnt);
1154 /* build an array of match/ins/del - length strings */
1155 int calc_code(const unsigned char *aa0, const int n0,
1156 const unsigned char *aa1, const int n1,
1157 struct a_struct *aln,
1158 struct a_res_str a_res,
1160 char *al_str, int al_str_n, struct f_struct *f_str)
1163 int op, lenc, nd, ns, itmp;
1165 const unsigned char *aa1p;
1171 if (pst.ext_sq_set) {
1186 aln->amin0 = a_res.min0;
1187 aln->amax0 = a_res.max0;
1188 aln->amin1 = a_res.min1;
1189 aln->amax1 = a_res.max1;
1192 lenc = aln->nident = aln->nsim = aln->ngap_q = aln->ngap_l = aln->nfs = op = p_op = 0;
1199 while (i0 < a_res.max0 || i1 < a_res.max1) {
1200 if (op == 0 && *rp == 0) {
1202 if (pst.pam2[0][aa0[i0]][aa1p[i1]]>=0) { aln->nsim++;}
1204 sp0 = sq[aa0[i0++]];
1205 sp1 = sq[aa1p[i1++]];
1207 if (p_op == 0 || p_op==3) {
1208 if (sp0 != '*' && sp1 != '*') {
1210 update_code(al_str,al_str_n-strlen(al_str),p_op,op_cnt);
1211 op_cnt = 1; p_op = 0;
1216 update_code(al_str,al_str_n-strlen(al_str),p_op,op_cnt);
1217 op_cnt = 1; p_op = 3;
1221 update_code(al_str,al_str_n-strlen(al_str),p_op,op_cnt);
1222 op_cnt = 1; p_op = 0;
1228 if (toupper(sp0) == toupper(sp1)) aln->nident++;
1229 else if (pst.dnaseq==1) {
1230 if ((toupper(sp0) == 'T' && toupper(sp1) == 'U') ||
1231 (toupper(sp0)=='U' && toupper(sp1)=='T')) aln->nident++;
1232 else if (toupper(sp0) == 'N') aln->ngap_q++;
1233 else if (toupper(sp1) == 'N') aln->ngap_l++;
1237 if (op==0) op = *rp++;
1239 if (p_op == 1) { op_cnt++;}
1241 update_code(al_str,al_str_n - strlen(al_str),p_op,op_cnt);
1242 op_cnt = 1; p_op = 1;
1244 op--; lenc++; i1++; aln->ngap_q++;
1247 if (p_op == 2) { op_cnt++;}
1249 update_code(al_str,al_str_n - strlen(al_str),p_op,op_cnt);
1250 op_cnt = 1; p_op = 2;
1252 op++; lenc++; i0++; aln->ngap_l++;
1256 update_code(al_str,al_str_n - strlen(al_str),p_op,op_cnt);
1262 update_code(char *al_str, int al_str_max, int op, int op_cnt) {
1264 char op_char[5]={"=-+*"};
1267 sprintf(tmp_cnt,"%c%d",op_char[op],op_cnt);
1268 strncat(al_str,tmp_cnt,al_str_max);
1271 int calc_id(const unsigned char *aa0, const int n0,
1272 const unsigned char *aa1, const int n1,
1273 struct a_struct *aln,
1274 struct a_res_str a_res,
1276 struct f_struct *f_str)
1278 int i0, i1, nn1, n_id;
1279 int op, lenc, nd, ns, itmp;
1281 const unsigned char *aa1p;
1285 if (pst.ext_sq_set) {
1301 lenc = n_id = aln->nsim = aln->ngap_q = aln->ngap_l = aln->nfs = op = 0;
1305 while (i0 < a_res.max0 || i1 < a_res.max1) {
1306 if (op == 0 && *rp == 0) {
1309 if (pst.pam2[0][aa0[i0]][aa1p[i1]]>=0) { aln->nsim++;}
1311 sp0 = sq[aa0[i0++]];
1312 sp1 = sq[aa1p[i1++]];
1313 if (toupper(sp0) == toupper(sp1)) n_id++;
1314 else if (pst.dnaseq==1 &&
1315 ((sp0=='T' && sp1== 'U')||(sp0=='U' && sp1=='T'))) n_id++;
1318 if (op==0) op = *rp++;
1319 if (op>0) {op--; lenc++; i1++; aln->ngap_q++; }
1320 else {op++; lenc++; i0++; aln->ngap_l++; }
1330 update_params(struct qmng_str *qm_msg, struct pstruct *ppst)
1332 ppst->n0 = qm_msg->n0;