2 /* copyright (c) 1998, 1999 William R. Pearson and the U. of Virginia */
4 /* - dropffa.c,v 1.1.1.1 1999/10/22 20:55:59 wrp Exp */
6 /* this code implements the "fastf" algorithm, which is designed to
7 deconvolve mixtures of protein sequences derived from mixed-peptide
8 Edman sequencing. The expected input is:
10 >test | 40001 90043 | mgstm1
16 Where the ','s indicate the length/end of the sequencing cycle
17 data. Thus, in this example, the sequence is from a mixture of 4
18 peptides, M was found in the first position, G,I, and L(2) at the second,
19 C,D, L(2) at the third, etc.
21 Because the sequences are derived from mixtures, there need not be
22 any partial sequence "MGCEN", the actual deconvolved sequence might be
41 #define NMAP MAXHASH+1
42 #define NMAP_X 23 /* re-code NMAP for 'X' */
43 #define NMAP_Z 24 /* re-code NMAP for '*' */
50 #include "drop_func.h"
52 static char *verstr="4.21 May 2006 (ajm/wrp)";
54 int shscore(unsigned char *aa0, const int n0, int **pam2, int nsq);
55 void update_code(char *al_str, int al_str_max, int op, int op_cnt, int fnum);
56 extern void aancpy(char *to, char *from, int count, struct pstruct pst);
59 extern int aatran(const unsigned char *ntseq, unsigned char *aaseq,
60 const int maxs, const int frame);
63 struct hlstr { int next, pos;};
65 void savemax(struct dstruct *, struct f_struct *);
67 static int m0_spam(unsigned char *, const unsigned char *, int, struct savestr *,
68 int **, struct f_struct *);
69 static int m1_spam(unsigned char *, int,
70 const unsigned char *, int,
71 struct savestr *, int **, int, struct f_struct *);
73 int sconn(struct savestr **v, int nsave, int cgap,
74 struct f_struct *, struct rstruct *, struct pstruct *,
75 const unsigned char *aa0, int n0,
76 const unsigned char *aa1, int n1,
79 void kpsort(struct savestr **, int);
80 void kssort(struct savestr **, int);
81 void kpsort(struct savestr **, int);
84 sconn_a(unsigned char *, int, int, struct f_struct *,
87 /* initialize for fasta */
90 init_work (unsigned char *aa0, int n0,
92 struct f_struct **f_arg)
97 struct f_struct *f_str;
101 struct savestr *vmptr;
104 f_str = (struct f_struct *) calloc(1, sizeof(struct f_struct));
106 fprintf(stderr, "Couldn't calloc f_str\n");
112 /* fastf3 cannot work with lowercase symbols as low complexity;
113 thus, NMAP must be disabled; this depends on aascii['X'] */
114 if (ppst->hsq[NMAP_X] == NMAP ) {ppst->hsq[NMAP_X]=1;}
115 if (ppst->hsq[NMAP_Z] == NMAP ) {ppst->hsq[NMAP_Z]=1;}
117 /* this does not work for share ppst structs, as in threads */
118 /*else {fprintf(stderr," cannot find 'X'==NMAP\n");} */
120 for (i0 = 1, mhv = -1; i0 <= ppst->nsq; i0++)
121 if (ppst->hsq[i0] < NMAP && ppst->hsq[i0] > mhv) mhv = ppst->hsq[i0];
124 fprintf (stderr, " maximum hsq <=0 %d\n", mhv);
128 for (f_str->kshft = 0; mhv > 0; mhv /= 2)
132 hmax = hv = (1 << f_str->kshft);
133 f_str->hmask = (hmax >> f_str->kshft) - 1;
135 if ((f_str->aa0 = (unsigned char *) calloc(n0+1, sizeof(char))) == NULL) {
136 fprintf (stderr, " cannot allocate f_str->aa0 array; %d\n",n0+1);
139 for (i=0; i<n0; i++) f_str->aa0[i] = aa0[i];
142 if ((f_str->aa0t = (unsigned char *) calloc(n0+1, sizeof(char))) == NULL) {
143 fprintf (stderr, " cannot allocate f_str0->aa0t array; %d\n",n0+1);
148 if ((f_str->harr = (struct hlstr *) calloc (hmax, sizeof (struct hlstr))) == NULL) {
149 fprintf (stderr, " cannot allocate hash array; hmax: %d hmask: %d\n",
153 if ((f_str->pamh1 = (int *) calloc (ppst->nsq+1, sizeof (int))) == NULL) {
154 fprintf (stderr, " cannot allocate pamh1 array\n");
157 if ((f_str->pamh2 = (int *) calloc (hmax, sizeof (int))) == NULL) {
158 fprintf (stderr, " cannot allocate pamh2 array\n");
161 if ((f_str->link = (struct hlstr *) calloc (n0, sizeof (struct hlstr))) == NULL) {
162 fprintf (stderr, " cannot allocate hash link array");
166 for (i0 = 0; i0 < hmax; i0++) {
167 f_str->harr[i0].next = -1;
168 f_str->harr[i0].pos = -1;
171 for (i0 = 0; i0 < n0; i0++) {
172 f_str->link[i0].next = -1;
173 f_str->link[i0].pos = -1;
176 /* encode the aa0 array */
178 this code has been modified to allow for mixed peptide sequences
179 aa0[] = 5 8 9 3 4 NULL 5 12 3 7 2 NULL
180 the 'NULL' character resets the hash position counter, to indicate that
181 any of several residues can be in the same position.
182 We also need to keep track of the number of times this has happened, so that
183 we can redivide the sequence later
185 i0 counts through the sequence
186 ii0 counts through the hashed sequence
193 for (i0= ii0 = 0; i0 < n0; i0++, ii0++) {
194 /* reset the counter and start hashing again */
195 if (aa0[i0] == ESS || aa0[i0] == 0) {
196 aa0[i0] = 0; /* set ESS to 0 */
197 /* fprintf(stderr," converted ',' to 0\n");*/
198 i0++; /* skip over the blank */
200 if (f_str->nmoff < 0) f_str->nmoff = i0;
204 hv = ppst->hsq[aa0[i0]];
205 f_str->link[i0].next = f_str->harr[hv].next;
206 f_str->link[i0].pos = f_str->harr[hv].pos;
207 f_str->harr[hv].next = i0;
208 f_str->harr[hv].pos = ii0;
209 f_str->pamh2[hv] = ppst->pam2[0][aa0[i0]][aa0[i0]];
211 if (f_str-> nmoff < 0) f_str->nmoff = n0;
216 fprintf(stderr," nmoff: %d/%d nm0: %d\n", f_str->nmoff, n0,f_str->nm0);
222 fprintf(stderr," hmax: %d\n",hmax);
223 for ( hv=0; hv<hmax; hv++)
224 fprintf(stderr,"%2d %c %3d %3d\n",hv,
225 (hv > 0 && hv < ppst->nsq ) ? ppst->sq[ppst->hsq[hv]] : ' ',
226 f_str->harr[hv].pos,f_str->harr[hv].next);
227 fprintf(stderr,"----\n");
228 for ( hv=0; hv<n0; hv++)
229 fprintf(stderr,"%2d: %3d %3d\n",hv,
230 f_str->link[hv].pos,f_str->link[hv].next);
234 f_str->maxsav = MAXSAV;
235 if ((f_str->vmax = (struct savestr *)
236 calloc(MAXSAV,sizeof(struct savestr)))==NULL) {
237 fprintf(stderr, "Couldn't allocate vmax[%d].\n",f_str->maxsav);
241 if ((f_str->vptr = (struct savestr **)
242 calloc(MAXSAV,sizeof(struct savestr *)))==NULL) {
243 fprintf(stderr, "Couldn't allocate vptr[%d].\n",f_str->maxsav);
247 for (vmptr = f_str->vmax; vmptr < &f_str->vmax[MAXSAV]; vmptr++) {
248 vmptr->used = (int *) calloc(n0, sizeof(int));
249 if(vmptr->used == NULL) {
250 fprintf(stderr, "Couldn't alloc vmptr->used\n");
255 /* this has been modified from 0..<ppst->nsq to 1..<=ppst->nsq because the
256 pam2[0][0] is now undefined for consistency with blast
259 for (i0 = 1; i0 <= ppst->nsq; i0++)
260 f_str->pamh1[i0] = ppst->pam2[0][i0][i0];
262 ppst->param_u.fa.cgap = shscore(aa0,f_str->nmoff-1,ppst->pam2[0],ppst->nsq)/3;
263 if (ppst->param_u.fa.cgap > ppst->param_u.fa.bestmax/4)
264 ppst->param_u.fa.cgap = ppst->param_u.fa.bestmax/4;
268 if (f_str->diag==NULL)
269 f_str->diag = (struct dstruct *) calloc ((size_t)MAXDIAG,
270 sizeof (struct dstruct));
272 if (f_str->diag == NULL)
274 fprintf (stderr, " cannot allocate diagonal arrays: %ld\n",
275 (long) MAXDIAG * (long) (sizeof (struct dstruct)));
280 if ((f_str->aa1x =(unsigned char *)calloc((size_t)ppst->maxlen+2,
281 sizeof(unsigned char)))
283 fprintf (stderr, "cannot allocate aa1x array %d\n", ppst->maxlen+2);
289 /* allocate space for the scoring arrays */
292 maxn0 = max(3*n0/2,MIN_RES);
293 if ((res = (int *)calloc((size_t)maxn0,sizeof(int)))==NULL) {
294 fprintf(stderr,"cannot allocate alignment results array %d\n",maxn0);
298 f_str->max_res = maxn0;
300 /* Tatusov Statistics Setup */
302 /* initialize priors array. */
303 if((f_str->priors = (double *)calloc(ppst->nsq+1, sizeof(double))) == NULL) {
304 fprintf(stderr, "Couldn't allocate priors array.\n");
307 calc_priors(f_str->priors, ppst, f_str, NULL, 0, ppst->pseudocts);
310 f_str->shuff_cnt = ppst->shuff_node;
312 /* End of Tatusov Statistics Setup */
318 /* pstring1 is a message to the manager, currently 512 */
319 /* pstring2 is the same information, but in a markx==10 format */
321 get_param (struct pstruct *pstr, char *pstring1, char *pstring2)
324 char *pg_str="FASTF";
326 char *pg_str="TFASTF";
329 sprintf (pstring1, "%s (%s) function [%s matrix (%d:%d)] join: %d",pg_str,verstr,
330 pstr->pamfile, pstr->pam_h,pstr->pam_l,pstr->param_u.fa.cgap);
332 if (pstr->param_u.fa.iniflag) strcat(pstring1," init1");
334 if (pstr->zsflag==0) strcat(pstring1," not-scaled");
335 else if (pstr->zsflag==1) strcat(pstring1," reg.-scaled");
338 if (pstring2 != NULL) {
339 sprintf (pstring2, "; pg_name: %s\n; pg_ver: %s\n; pg_matrix: %s (%d:%d)\n\
341 pg_str,verstr, pstr->pamfile, pstr->pam_h,pstr->pam_l,
342 pstr->param_u.fa.cgap);
347 close_work (const unsigned char *aa0, const int n0,
348 struct pstruct *ppst,
349 struct f_struct **f_arg)
351 struct f_struct *f_str;
352 struct savestr *vmptr;
358 for (vmptr = f_str->vmax; vmptr < &f_str->vmax[MAXSAV]; vmptr++)
363 free(f_str->aa1x - 1); /* allocated, then aa1x++'ed */
378 int do_fastf (unsigned char *aa0, int n0,
379 const unsigned char *aa1, int n1,
380 struct pstruct *ppst, struct f_struct *f_str,
381 struct rstruct *rst, int *hoff, int opt_prob)
383 int nd; /* diagonal array size */
386 register struct dstruct *dptr;
388 register struct dstruct *diagp;
389 struct dstruct *dpmax;
392 struct savestr *vmptr;
395 int cmps (); /* comparison routine for ksort */
401 rst->score[0] = rst->score[1] = rst->score[2] = 0;
408 if (n0+n1+1 >= MAXDIAG) {
409 fprintf(stderr,"n0,n1 too large: %d, %d\n",n0,n1);
410 rst->score[0] = rst->score[1] = rst->score[2] = -1;
419 dpmax = &f_str->diag[nd];
420 for (dptr = &f_str->diag[f_str->ndo]; dptr < dpmax;) {
426 /* initialize the saved segment structures */
427 for (vmptr = f_str->vmax; vmptr < &f_str->vmax[MAXSAV]; vmptr++) {
429 memset(vmptr->used, 0, n0 * sizeof(int));
432 f_str->lowmax = f_str->vmax;
437 diagp = &f_str->diag[f_str->noff];
438 for (lhval = lpos = 0; lpos < n1; lpos++, diagp++) {
439 if (hsq[aa1[lpos]]>=NMAP) {
441 while (lpos < n1 && hsq[aa1[lpos]]>=NMAP) {lpos++; diagp++;}
442 if (lpos >= n1) break;
445 lhval = hsq[aa1[lpos]];
446 for (tpos = f_str->harr[lhval].pos, npos = f_str->harr[lhval].next;
447 tpos >= 0; tpos = f_str->link[npos].pos, npos = f_str->link[npos].next) {
448 /* tscor gets position of end of current lpos diag run */
449 if ((tscor = (dptr = &diagp[-tpos])->stop) >= 0) {
450 tscor++; /* move forward one */
451 if ((tscor -= lpos) <= 0) { /* check for size of gap to this hit - */
452 /* includes implicit -1 mismatch penalty */
453 scor = dptr->score; /* current score of this run */
454 if ((tscor += (kfact = f_str->pamh2[lhval])) < 0 &&
455 f_str->lowscor < scor) /* if updating tscor makes run worse, */
456 savemax (dptr, f_str); /* save it */
458 if ((tscor += scor) >= kfact) { /* add to current run if continuing */
459 /* is better than restart (kfact) */
464 dptr->score = kfact; /* starting over is better */
465 dptr->start = (dptr->stop = lpos);
468 else { /* continue current run */
469 dptr->score += f_str->pamh1[aa0[tpos]];
473 else { /* no diagonal run yet */
474 dptr->score = f_str->pamh2[lhval];
475 dptr->start = (dptr->stop = lpos);
480 for (dptr = f_str->diag; dptr < dpmax;) {
481 if (dptr->score > f_str->lowscor) savemax (dptr, f_str);
489 at this point all of the elements of aa1[lpos]
490 have been searched for elements of aa0[tpos]
491 with the results in diag[dpos]
494 /* set up pointers for sorting */
496 for (nsave = 0, vmptr = f_str->vmax; vmptr < &f_str->vmax[MAXSAV]; vmptr++) {
497 if (vmptr->score > 0) {
498 vmptr->score = m0_spam (aa0, aa1, n1, vmptr, ppst->pam2[0], f_str);
499 f_str->vptr[nsave++] = vmptr;
504 kssort (f_str->vptr, nsave);
509 for (ib=0; ib<nsave; ib++) {
510 fprintf(stderr,"0: %4d-%4d 1: %4d-%4d dp: %d score: %d\n",
511 f_str->noff+f_str->vptr[ib]->start-f_str->vptr[ib]->dp,
512 f_str->noff+f_str->vptr[ib]->stop-f_str->vptr[ib]->dp,
513 f_str->vptr[ib]->start,f_str->vptr[ib]->stop,
514 f_str->vptr[ib]->dp,f_str->vptr[ib]->score);
515 for (im=f_str->vptr[ib]->start; im<=f_str->vptr[ib]->stop; im++)
516 fprintf(stderr," %c:%c",ppst->sq[aa0[f_str->noff+im-f_str->vptr[ib]->dp]],
520 fprintf(stderr,"---\n");
522 /* now use m_spam to re-evaluate */
524 for (tpos = 0; tpos < n0; tpos++) {
525 fprintf(stderr,"%c:%2d ",ppst->sq[aa0[tpos]],aa0[tpos]);
526 if (tpos %10 == 9) fputc('\n',stderr);
533 for (ib=0; ib < nsave; ib++) {
534 if ((vmptr=f_str->vptr[ib])->score > 0) {
535 vmptr->score = m1_spam (aa0, n0, aa1, n1, vmptr,
536 ppst->pam2[0], ppst->pam_l, f_str);
539 /* reset aa0 - modified by m1_spam */
540 for (tpos = 0; tpos < n0; tpos++) {
541 if (aa0[tpos] >= 32) aa0[tpos] -= 32;
544 kssort(f_str->vptr,nsave);
546 for ( ; nsave > 0; nsave--)
547 if (f_str->vptr[nsave-1]->score >0) break;
551 rst->score[0] = rst->score[1] = rst->score[2] = 0;
556 else f_str->nsave = nsave;
561 fprintf(stderr,"n0: %d; n1: %d; noff: %d\n",n0,n1,f_str->noff);
562 for (ib=0; ib<nsave; ib++) {
563 fprintf(stderr,"0: %4d-%4d 1: %4d-%4d dp: %d score: %d\n",
564 f_str->noff+f_str->vptr[ib]->start-f_str->vptr[ib]->dp,
565 f_str->noff+f_str->vptr[ib]->stop-f_str->vptr[ib]->dp,
566 f_str->vptr[ib]->start,f_str->vptr[ib]->stop,
567 f_str->vptr[ib]->dp,f_str->vptr[ib]->score);
568 for (im=f_str->vptr[ib]->start; im<=f_str->vptr[ib]->stop; im++)
569 fprintf(stderr," %c:%c",ppst->sq[aa0[f_str->noff+im-f_str->vptr[ib]->dp]],
574 fprintf(stderr,"---\n");
578 scor = sconn (f_str->vptr, nsave, ppst->param_u.fa.cgap, f_str,
579 rst, ppst, aa0, n0, aa1, n1, opt_prob);
581 for (vmptr=f_str->vptr[0],ib=1; ib<nsave; ib++)
582 if (f_str->vptr[ib]->score > vmptr->score) vmptr=f_str->vptr[ib];
584 rst->score[1] = vmptr->score;
585 rst->score[0] = rst->score[2] = max (scor, vmptr->score);
590 void do_work (const unsigned char *aa0, int n0,
591 const unsigned char *aa1, int n1,
593 struct pstruct *ppst, struct f_struct *f_str,
594 int qr_flg, struct rstruct *rst)
599 if (qr_flg==1 && f_str->shuff_cnt <= 0) {
601 rst->score[0]=rst->score[1]=rst->score[2]= -1;
605 if (f_str->dotat || ppst->zsflag == 4 || ppst->zsflag == 14 ) opt_prob=1;
607 if (ppst->zsflag == 2 || ppst->zsflag == 12) opt_prob = 0;
615 rst->score[0] = rst->score[1] = rst->score[2] = -1;
621 n10=aatran(aa1,f_str->aa1x,n1,frame);
623 for (i=0; i<n10; i++)
624 if (f_str->aa1x[i]>ppst->nsq) {
626 "residue[%d/%d] %d range (%d)\n",i,n1,
627 f_str->aa1x[i],ppst->nsq);
632 do_fastf (f_str->aa0, n0, f_str->aa1x, n10, ppst, f_str, rst, &hoff, opt_prob);
634 do_fastf (f_str->aa0, n0, aa1, n1, ppst, f_str, rst, &hoff, opt_prob);
637 rst->comp = rst->H = -1.0;
641 void do_opt (const unsigned char *aa0, int n0,
642 const unsigned char *aa1, int n1,
644 struct pstruct *ppst,
645 struct f_struct *f_str,
648 int optflag, tscore, hoff, n10;
650 optflag = ppst->param_u.fa.optflag;
651 ppst->param_u.fa.optflag = 1;
654 n10=aatran(aa1,f_str->aa1x,n1,frame);
655 do_fastf (f_str->aa0, n0, f_str->aa1x, n10, ppst, f_str, rst, &hoff, 1);
657 do_fastf(f_str->aa0, n0, aa1, n1, ppst, f_str, rst, &hoff, 1);
659 ppst->param_u.fa.optflag = optflag;
663 savemax (dptr, f_str)
664 register struct dstruct *dptr;
665 struct f_struct *f_str;
668 register struct savestr *vmptr;
671 dpos = (int) (dptr - f_str->diag);
673 /* check to see if this is the continuation of a run that is already saved */
675 if ((vmptr = dptr->dmax) != NULL && vmptr->dp == dpos &&
676 vmptr->start == dptr->start)
678 vmptr->stop = dptr->stop;
679 if ((i = dptr->score) <= vmptr->score)
682 if (vmptr != f_str->lowmax)
687 i = f_str->lowmax->score = dptr->score;
688 f_str->lowmax->dp = dpos;
689 f_str->lowmax->start = dptr->start;
690 f_str->lowmax->stop = dptr->stop;
691 dptr->dmax = f_str->lowmax;
694 for (vmptr = f_str->vmax; vmptr < &f_str->vmax[MAXSAV]; vmptr++)
695 if (vmptr->score < i)
698 f_str->lowmax = vmptr;
703 /* this version of spam() is designed to work with a collection of
704 subfragments, selecting the best amino acid at each position so
705 that, from each subfragment, each position is only used once.
707 As a result, m_spam needs to know the number of fragments.
709 In addition, it now requires a global alignment to the fragment
710 and resets the start and stop positions
715 m1_spam (unsigned char *aa0, int n0,
716 const unsigned char *aa1, int n1,
717 struct savestr *dmax, int **pam2, int pam_l,
718 struct f_struct *f_str)
720 int tpos, lpos, im, ii, nm, ci;
724 int start, stop, score;
727 const unsigned char *aa1p;
729 lpos = dmax->start; /* position in library sequence */
730 tpos = lpos - dmax->dp + f_str->noff; /* position in query sequence */
731 /* force global alignment, reset start*/
733 lpos = dmax->start -= tpos;
738 lpos = dmax->start = 0;
741 dmax->stop = dmax->start + (f_str->nmoff -2 - tpos);
742 if (dmax->stop > n1) dmax->stop = n1;
745 if (dmax->start < 0) {
747 lpos = dmax->start=0;
757 tot = curv.score = maxv.score = 0;
758 for (; lpos <= dmax->stop; lpos++,aa0p++,aa1p++) {
761 for (im = 0, ii=0; im < nm; im++,ii+=f_str->nmoff) {
762 if (aa0p[ii] < 32 && (pv = pam2[aa0p[ii]][*aa1p]) > ctot) {
765 /* fprintf(stderr, "lpos: %d im: %d ii: %d ci: %d ctot: %d pi: %d pv: %d\n", lpos, im, ii, ci, ctot, aa0p[ii], pam2[aa0p[ii]][*aa1p]); */
769 if (ci >= 0 && aa0p[ci] < 32) {
771 /* fprintf(stderr, "used: lpos: %d ci: %d : %c\n", lpos, ci, sq[aa0p[ci]]); */
774 dmax->used[&aa0p[ci] - aa0] = 1;
780 int ma_spam (unsigned char *aa0, int n0, const unsigned char *aa1,
781 struct savestr *dmax, struct pstruct *ppst,
782 struct f_struct *f_str)
785 int tpos, lpos, im, ii, nm, ci, lp0;
788 int start, stop, score;
790 const unsigned char *aa1p;
791 unsigned char *aa0p, *aa0pt;
794 pam2 = ppst->pam2[0];
797 lpos = dmax->start; /* position in library sequence */
798 tpos = lpos - dmax->dp + f_str->noff; /* position in query sequence */
799 lp0 = lpos = dmax->start;
801 aa0p = &aa0[tpos]; /* real aa0 sequence */
803 /* the destination aa0 sequence (without nulls) */
804 aa0pt = &f_str->aa0t[f_str->aa0ix];
809 /* sometimes, tpos may be > 0, with lpos = 0 - fill with 'X' */
810 if (lpos == 0 && tpos > 0)
811 for (ii = 0; ii < tpos; ii++) *aa0pt++ = 31; /* filler character */
813 tot = curv.score = maxv.score = 0;
814 for (; lpos <= dmax->stop; lpos++) {
817 for (im = 0, ii=0; im < nm; im++,ii+=f_str->nmoff) {
818 if (aa0p[ii] < 32 && (pv = pam2[aa0p[ii]][*aa1p]) > ctot) {
825 if (ci >= n0) {fprintf(stderr," warning - ci off end %d/%d\n",ci,n0);}
836 dmax->dp -= f_str->aa0ix; /* shift ->dp for aa0t */
837 if ((ci=(int)(aa0pt-f_str->aa0t)) > n0) {
838 fprintf(stderr," warning - aapt off %d/%d end\n",ci,n0);
841 *aa0pt++ = 0; /* skip over NULL */
843 aa0pt = &f_str->aa0t[f_str->aa0ix];
847 for (im = 0; im < f_str->nmoff; im++)
848 fprintf(stderr,"%c:%c,",ppst->sq[aa0pt[im]],ppst->sq[aa1p[im]]);
849 fprintf(stderr,"- %3d (%3d:%3d)\n",dmax->score,f_str->aa0ix,lp0);
852 f_str->aa0ix += f_str->nmoff; /* update offset into aa0t */
855 fprintf(stderr," ma_spam returning: %d\n",tot);
861 m0_spam (unsigned char *aa0, const unsigned char *aa1, int n1,
862 struct savestr *dmax, int **pam2,
863 struct f_struct *f_str)
865 int tpos, lpos, lend, im, ii, nm;
868 int start, stop, score;
870 const unsigned char *aa0p, *aa1p;
872 lpos = dmax->start; /* position in library sequence */
873 tpos = lpos - dmax->dp + f_str->noff; /* position in query sequence */
875 if (lpos-tpos >= 0) {
876 lpos = dmax->start -= tpos; /* force global alignment, reset start*/
881 lpos = dmax->start = 0;
887 if (n1 - (lpos + f_str->nmoff-2) < 0 ) {
888 lend = dmax->stop = (lpos - tpos) + f_str->nmoff-2;
889 if (lend >= n1) lend = n1-1;
897 tot = curv.score = maxv.score = 0;
898 for (; lpos <= lend; lpos++) {
900 for (im = 0, ii=0; im < nm; im++,ii+=f_str->nmoff) {
901 if ((pv = pam2[aa0p[ii]][*aa1p]) > ctot) {
909 /* reset dmax if necessary */
914 /* sconn links up non-overlapping alignments and calculates the score */
916 int sconn (struct savestr **v, int n, int cgap, struct f_struct *f_str,
917 struct rstruct *rst, struct pstruct *ppst,
918 const unsigned char *aa0, int n0,
919 const unsigned char *aa1, int n1,
923 struct slink *start, *sl, *sj, *so, sarr[MAXSAV];
927 /* sarr[] saves each alignment score/position, and provides a link
928 back to the previous alignment that maximizes the score */
930 /* sort the score left to right in lib pos */
935 /* for the remaining runs, see if they fit */
936 for (i = 0, si = 0; i < n; i++) {
938 /* if the score is less than the gap penalty, it never helps */
939 if (!opt_prob && (v[i]->score < cgap) ){ continue; }
941 lstart = v[i]->start;
943 /* put the run in the group */
945 sarr[si].score = v[i]->score;
946 sarr[si].next = NULL;
947 sarr[si].prev = NULL;
952 calc_tatusov(NULL, &sarr[si], aa0, n0, aa1, n1,
953 ppst->pam2[0],ppst->nsq, f_str,
954 ppst->pseudocts, opt_prob,ppst->zsflag);
955 sarr[si].tat = sarr[si].newtat;
958 /* if it fits, then increase the score */
959 for (sl = start; sl != NULL; sl = sl->next) {
960 plstop = sl->vp->stop;
961 /* if end < start or start > end, add score */
962 if (plstop < lstart ) {
964 sarr[si].score = sl->score + v[i]->score;
967 fprintf(stderr,"sconn %d added %d/%d getting %d; si: %d, tat: %g\n",
968 i,v[i]->start, v[i]->score,sarr[si].score,si, 2.0);
973 calc_tatusov(sl, &sarr[si], aa0, n0, aa1, n1,
974 ppst->pam2[0], ppst->nsq, f_str,
975 ppst->pseudocts, opt_prob, ppst->zsflag);
976 /* if our tatprob gets worse when we add this, forget it */
977 if(tatprob > sarr[si].tatprob) {
978 free(sarr[si].newtat->probs); /* get rid of new tat struct */
979 free(sarr[si].newtat);
982 sarr[si].tatprob = tatprob;
983 free(sarr[si].tat->probs); /* get rid of old tat struct */
985 sarr[si].tat = sarr[si].newtat;
987 sarr[si].score = sl->score + v[i]->score;
989 fprintf(stderr,"sconn TAT %d added %d/%d getting %d; si: %d, tat: %g\n",
990 i,v[i]->start, v[i]->score,sarr[si].score,si, tatprob);
998 /* now recalculate where the score fits - resort the scores */
1002 if(!opt_prob) { /* sort by scores */
1003 for (sj = start, so = NULL; sj != NULL; sj = sj->next) {
1004 if (sarr[si].score > sj->score) { /* if new score > best score */
1005 sarr[si].next = sj; /* previous best linked to best */
1007 so->next = &sarr[si]; /* old best points to new best */
1012 so = sj; /* old-best saved in so */
1014 } else { /* sort by tatprobs */
1015 for (sj = start, so = NULL; sj != NULL; sj = sj->next) {
1016 if ( sarr[si].tatprob < sj->tatprob ||
1017 ((sarr[si].tatprob == sj->tatprob) && sarr[si].score > sj->score) ) {
1020 so->next = &sarr[si];
1033 for (i = 0 ; i < si ; i++) {
1034 free(sarr[i].tat->probs);
1039 if (start != NULL) {
1042 rst->escore = start->tatprob;
1046 rst->segnum = rst->seglen = 0;
1047 for(sj = start ; sj != NULL; sj = sj->prev) {
1049 rst->seglen += sj->vp->stop - sj->vp->start + 1;
1051 return (start->score);
1059 rst->segnum = rst->seglen = 0;
1065 kssort (struct savestr **v, int n)
1068 struct savestr *tmp;
1070 for (gap = n / 2; gap > 0; gap /= 2)
1071 for (i = gap; i < n; i++)
1072 for (j = i - gap; j >= 0; j -= gap)
1074 if (v[j]->score >= v[j + gap]->score)
1083 struct savestr *v[];
1087 struct savestr *tmp;
1089 for (gap = n / 2; gap > 0; gap /= 2)
1090 for (i = gap; i < n; i++)
1091 for (j = i - gap; j >= 0; j -= gap)
1093 if (v[j]->start <= v[j + gap]->start)
1101 /* sorts alignments from right to left (back to front) based on stop */
1105 struct savestr *v[];
1109 struct savestr *tmp;
1111 for (gap = n / 2; gap > 0; gap /= 2)
1112 for (i = gap; i < n; i++)
1113 for (j = i - gap; j >= 0; j -= gap)
1115 if (v[j]->stop > v[j + gap]->stop)
1123 int do_walign (const unsigned char *aa0, int n0,
1124 const unsigned char *aa1, int n1,
1126 struct pstruct *ppst,
1127 struct f_struct *f_str,
1128 struct a_res_str *a_res,
1134 unsigned char *aa0t;
1135 const unsigned char *aa1p;
1138 f_str->n10 = n10 = aatran(aa1,f_str->aa1x,n1,frame);
1145 do_fastf(f_str->aa0, n0, aa1p, n10, ppst, f_str, &rst, &hoff, 1);
1147 /* the alignment portion takes advantage of the information left
1148 over in f_str after do_fastf is done. in particular, it is
1149 easy to run a modified sconn() to produce the alignments.
1151 unfortunately, the alignment display routine wants to have
1152 things encoded as with bd_align and sw_align, so we need to do that.
1155 if ((aa0t = (unsigned char *)calloc(n0+1,sizeof(unsigned char)))==NULL) {
1156 fprintf(stderr," cannot allocate aa0t %d\n",n0+1);
1160 kssort (f_str->vptr, f_str->nsave);
1162 if (f_str->nsave > f_str->nm0) f_str->nsave = f_str->nm0;
1163 for (ib=0; ib < f_str->nm0; ib++) {
1164 if (f_str->vptr[ib]->score > 0) {
1165 f_str->vptr[ib]->score =
1166 ma_spam (f_str->aa0, n0, aa1p, f_str->vptr[ib], ppst, f_str);
1170 /* after ma_spam is over, we need to reset aa0 */
1171 for (ib = 0; ib < n0; ib++) {
1172 if (f_str->aa0[ib] >= 32) f_str->aa0[ib] -= 32;
1175 kssort(f_str->vptr,f_str->nsave);
1177 for ( ; f_str->nsave > 0; f_str->nsave--)
1178 if (f_str->vptr[f_str->nsave-1]->score >0) break;
1180 a_res->nres = sconn_a (aa0t,n0, ppst->param_u.fa.cgap, f_str,a_res);
1183 a_res->res = f_str->res;
1185 return rst.score[0];
1188 /* this version of sconn is modified to provide alignment information */
1190 int sconn_a (unsigned char *aa0, int n0, int cgap,
1191 struct f_struct *f_str,
1192 struct a_res_str *a_res)
1194 int i, si, cmpp (), n;
1195 unsigned char *aa0p;
1202 struct slink *snext;
1203 struct slink *aprev;
1204 } *start, *sl, *sj, *so, sarr[MAXSAV];
1206 int *res, nres, tres;
1208 /* sort the score left to right in lib pos */
1213 krsort (v, n); /* sort from left to right in library */
1217 /* for each alignment, see if it fits */
1219 for (i = 0, si = 0; i < n; i++) {
1221 /* if the score is less than the join threshold, skip it */
1222 if (v[i]->score < cgap) continue;
1224 lstop = v[i]->stop; /* have right-most lstart */
1226 /* put the alignment in the group */
1229 sarr[si].score = v[i]->score;
1230 sarr[si].snext = NULL;
1231 sarr[si].aprev = NULL;
1233 /* if it fits, then increase the score */
1234 /* start points to a sorted (by total score) list of candidate
1237 for (sl = start; sl != NULL; sl = sl->snext) {
1238 plstart = sl->vp->start;
1239 if (plstart > lstop ) {
1240 sarr[si].score = sl->score + v[i]->score;
1241 sarr[si].aprev = sl;
1242 break; /* quit as soon as the alignment has been added */
1246 /* now recalculate the list of best scores */
1248 start = &sarr[si]; /* put the first one in the list */
1250 for (sj = start, so = NULL; sj != NULL; sj = sj->snext) {
1251 if (sarr[si].score > sj->score) { /* new score better than old */
1252 sarr[si].snext = sj; /* snext best after new score */
1254 so->snext = &sarr[si]; /* prev_best->snext points to best */
1255 else start = &sarr[si]; /* start points to best */
1256 break; /* stop looking */
1258 so = sj; /* previous candidate best */
1260 si++; /* increment to snext alignment */
1263 /* we have the best set of alignments, write them to *res */
1264 if (start != NULL) {
1265 res = f_str->res; /* set a destination for the alignment ops */
1266 tres = nres = 0; /* alignment op length = 0 */
1267 aa0p = aa0; /* point into query (needed for calcons later) */
1268 a_res->min1 = start->vp->start; /* start in library */
1269 a_res->min0 = 0; /* start in query */
1270 for (sj = start; sj != NULL; sj = sj->aprev ) {
1271 doff = (int)(aa0p-aa0) - (sj->vp->start-sj->vp->dp+f_str->noff);
1273 fprintf(stderr,"doff: %3d\n",doff);
1275 for (dx=sj->vp->start,sx=sj->vp->start-sj->vp->dp+f_str->noff;
1276 dx <= sj->vp->stop; dx++) {
1277 *aa0p++ = f_str->aa0t[sx++]; /* copy residue into aa0 */
1278 tres++; /* bump alignment counter */
1279 res[nres++] = 0; /* put 0-op in res */
1282 if (sj->aprev != NULL) {
1283 if (sj->aprev->vp->start - sj->vp->stop - 1 > 0 )
1284 /* put an insert op into res to get to next aligned block */
1285 tres += res[nres++] = (sj->aprev->vp->start - sj->vp->stop - 1);
1288 fprintf(stderr,"t0: %3d, tx: %3d, l0: %3d, lx: %3d, dp: %3d noff: %3d, score: %3d\n",
1289 sj->vp->start - sj->vp->dp + f_str->noff,
1290 sj->vp->stop - sj->vp->dp + f_str->noff,
1291 sj->vp->start,sj->vp->stop,sj->vp->dp,
1292 f_str->noff,sj->vp->score);
1293 fprintf(stderr,"%3d - %3d: %3d\n",
1294 sj->vp->start,sj->vp->stop,sj->vp->score);
1296 a_res->max1 = sj->vp->stop;
1297 a_res->max0 = a_res->max1 - sj->vp->dp + f_str->noff;
1301 fprintf(stderr,"(%3d - %3d):(%3d - %3d)\n",
1302 a_res->min0,a_res->max0,a_res->min1,a_res->max1);
1305 /* now replace f_str->aa0t with aa0 */
1306 for (i=0; i<n0; i++) f_str->aa0t[i] = aa0[i];
1313 /* calculate the 100% identical score */
1315 shscore(unsigned char *aa0, int n0, int **pam2, int nsq)
1318 for (i=0,sum=0; i<n0; i++)
1319 if (aa0[i]!=0 && aa0[i]<=nsq) sum += pam2[aa0[i]][aa0[i]];
1324 pre_cons(const unsigned char *aa1, int n1, int frame, struct f_struct *f_str) {
1327 f_str->n10=aatran(aa1,f_str->aa1x,n1,frame);
1331 /* aln_func_vals - set up aln.qlfact, qlrev, llfact, llmult, frame, llrev */
1332 /* call from calcons, calc_id, calc_code */
1334 aln_func_vals(int frame, struct a_struct *aln) {
1339 aln->llfact = aln->llmult = 3;
1341 if (frame > 3) aln->llrev = 1;
1343 aln->llfact = aln->qlfact = aln->llmult = 1;
1344 aln->llrev = aln->qlrev = 0;
1351 int calcons(const unsigned char *aa0, int n0,
1352 const unsigned char *aa1, int n1,
1354 struct a_struct *aln,
1355 struct a_res_str a_res,
1357 char *seqc0, char *seqc1, char *seqca,
1358 struct f_struct *f_str)
1360 int i0, i1, nn1, n0t;
1361 int op, lenc, len_gap, nd, ns, itmp;
1362 const unsigned char *aa1p;
1363 char *sp0, *sp1, *sq, *spa;
1367 /* do not allow low complexity */
1378 /* first fill in the ends */
1379 /* a_res.min0--; a_res.min1--; */
1380 n0 -= (f_str->nm0-1);
1382 aln->amin0 = a_res.min0;
1383 aln->amin1 = a_res.min1;
1384 aln->amax0 = a_res.max0;
1385 aln->amax1 = a_res.max1;
1387 if (min(a_res.min0,a_res.min1)<aln->llen || aln->showall==1) {
1388 /* will we show all the start ?*/
1390 mins = min(a_res.min1,aln->llen/2);
1391 aancpy(seqc1,(char *)(aa1p+a_res.min1-mins),mins,pst);
1392 aln->smin1 = a_res.min1-mins;
1393 if ((mins-a_res.min0)>0) {
1394 memset(seqc0,' ',mins-a_res.min0);
1395 aancpy(seqc0+mins-a_res.min0,(char *)f_str->aa0t,a_res.min0,pst);
1399 aancpy(seqc0,(char *)f_str->aa0t+a_res.min0-mins,mins,pst);
1400 aln->smin0 = a_res.min0-mins;
1404 mins= min(aln->llen/2,min(a_res.min0,a_res.min1));
1406 aln->smin0=a_res.min0;
1407 aln->smin1=a_res.min1;
1408 aancpy(seqc0,(char *)f_str->aa0t+a_res.min0-mins,mins,pst);
1409 aancpy(seqc1,(char *)aa1p+a_res.min1-mins,mins,pst);
1412 memset(seqca,M_BLANK,mins);
1414 /* now get the middle */
1420 n0t = lenc = len_gap = aln->nident = aln->nsim = aln->ngap_q = aln->ngap_l = op = 0;
1424 while (i0 < a_res.max0 || i1 < a_res.max1) {
1425 if (op == 0 && *rp == 0) {
1428 if ((itmp=pst.pam2[0][f_str->aa0t[i0]][aa1p[i1]])<0) { *spa = M_NEG; }
1429 else if (itmp == 0) { *spa = M_ZERO;}
1430 else {*spa = M_POS;}
1431 if (*spa == M_ZERO || *spa == M_POS) { aln->nsim++;}
1433 *sp0 = sq[f_str->aa0t[i0++]];
1434 *sp1 = sq[aa1p[i1++]];
1437 if (toupper(*sp0) == toupper(*sp1)) {aln->nident++; *spa = M_IDENT;}
1438 sp0++; sp1++; spa++;
1441 if (op==0) { op = *rp++;}
1444 *sp1++ = sq[aa1p[i1++]];
1451 *sp0++ = sq[f_str->aa0t[i0++]];
1465 /* now we have the middle, get the right end */
1466 /* ns is amount to be shown */
1467 /* nd is amount remaining to be shown */
1468 ns = mins + lenc + aln->llen;
1469 ns -= (itmp = ns %aln->llen);
1470 if (itmp>aln->llen/2) ns += aln->llen;
1471 nd = ns - (mins+lenc);
1472 if (nd > max(n0t-a_res.max0,nn1-a_res.max1)) nd = max(n0t-a_res.max0,nn1-a_res.max1);
1474 if (aln->showall==1) {
1475 nd = max(n0t-a_res.max0,nn1-a_res.max1); /* reset for showall=1 */
1477 /* there isn't any aa0 to get */
1478 memset(seqc0+mins+lenc,' ',n0t-a_res.max0);
1479 aancpy(seqc1+mins+lenc,(char *)aa1p+a_res.max1,nn1-a_res.max1,pst);
1480 /* fill with blanks - this is required to use one 'nc' */
1481 memset(seqc0+mins+lenc+n0t-a_res.max0,' ',nd-(n0t-a_res.max0));
1482 memset(seqc1+mins+lenc+nn1-a_res.max1,' ',nd-(nn1-a_res.max1));
1485 memset(seqc0+mins+lenc,' ',nd);
1486 if ((nd-(nn1-a_res.max1))>0) {
1487 aancpy(seqc1+mins+lenc,(char *)aa1p+a_res.max1,nn1-a_res.max1,pst);
1488 memset(seqc1+mins+lenc+nn1-a_res.max1,' ',nd-(nn1-a_res.max1));
1490 else aancpy(seqc1+mins+lenc,(char *)aa1p+a_res.max1,nd,pst);
1493 return mins+lenc+nd;
1496 int calcons_a(const unsigned char *aa0, unsigned char *aa0a, int n0,
1497 const unsigned char *aa1, int n1,
1499 struct a_struct *aln,
1500 struct a_res_str a_res,
1502 char *seqc0, char *seqc0a, char *seqc1, char *seqca,
1503 char *ann_arr, struct f_struct *f_str)
1505 int i0, i1, nn1, n0t;
1506 int op, lenc, len_gap, nd, ns, itmp;
1507 const unsigned char *aa1p;
1508 char *sp0, *sp0a, *sp1, *sq, *spa;
1512 /* do not allow low complexity */
1523 aln->amin0 = a_res.min0;
1524 aln->amin1 = a_res.min1;
1525 aln->amax0 = a_res.max0;
1526 aln->amax1 = a_res.max1;
1528 /* first fill in the ends */
1529 n0 -= (f_str->nm0-1);
1531 if (min(a_res.min0,a_res.min1)<aln->llen || aln->showall==1) {
1532 /* will we show all the start ?*/
1534 mins = min(a_res.min1,aln->llen/2);
1535 aancpy(seqc1,(char *)(aa1p+a_res.min1-mins),mins,pst);
1536 aln->smin1 = a_res.min1-mins;
1537 if ((mins-a_res.min0)>0) {
1538 memset(seqc0,' ',mins-a_res.min0);
1539 aancpy(seqc0+mins-a_res.min0,(char *)f_str->aa0t,a_res.min0,pst);
1543 aancpy(seqc0,(char *)f_str->aa0t+a_res.min0-mins,mins,pst);
1544 aln->smin0 = a_res.min0-mins;
1548 mins= min(aln->llen/2,min(a_res.min0,a_res.min1));
1550 aln->smin0=a_res.min0;
1551 aln->smin1=a_res.min1;
1552 aancpy(seqc0,(char *)f_str->aa0t+a_res.min0-mins,mins,pst);
1553 aancpy(seqc1,(char *)aa1p+a_res.min1-mins,mins,pst);
1556 memset(seqca,M_BLANK,mins);
1557 memset(seqc0a,' ',mins);
1559 /* now get the middle */
1566 n0t = lenc = len_gap = aln->nident = aln->nsim = aln->ngap_q = aln->ngap_l = op = 0;
1570 while (i0 < a_res.max0 || i1 < a_res.max1) {
1571 if (op == 0 && *rp == 0) {
1574 if ((itmp=pst.pam2[0][f_str->aa0t[i0]][aa1p[i1]])<0) { *spa = M_NEG; }
1575 else if (itmp == 0) { *spa = M_ZERO;}
1576 else {*spa = M_POS;}
1577 if (*spa == M_ZERO || *spa == M_POS) { aln->nsim++;}
1580 *sp0 = sq[f_str->aa0t[i0++]];
1581 *sp1 = sq[aa1p[i1++]];
1584 if (toupper(*sp0) == toupper(*sp1)) {aln->nident++; *spa = M_IDENT;}
1585 sp0++; sp1++; spa++;
1588 if (op==0) { op = *rp++;}
1592 *sp1++ = sq[aa1p[i1++]];
1599 *sp0++ = sq[f_str->aa0t[i0++]];
1611 *sp0a = *spa = '\0';
1614 /* now we have the middle, get the right end */
1615 /* ns is amount to be shown */
1616 /* nd is amount remaining to be shown */
1617 ns = mins + lenc + aln->llen;
1618 ns -= (itmp = ns %aln->llen);
1619 if (itmp>aln->llen/2) ns += aln->llen;
1620 nd = ns - (mins+lenc);
1621 if (nd > max(n0t-a_res.max0,nn1-a_res.max1)) nd = max(n0t-a_res.max0,nn1-a_res.max1);
1623 if (aln->showall==1) {
1624 nd = max(n0t-a_res.max0,nn1-a_res.max1); /* reset for showall=1 */
1626 /* there isn't any aa0 to get */
1627 memset(seqc0+mins+lenc,' ',n0t-a_res.max0);
1628 aancpy(seqc1+mins+lenc,(char *)aa1p+a_res.max1,nn1-a_res.max1,pst);
1629 /* fill with blanks - this is required to use one 'nc' */
1630 memset(seqc0+mins+lenc+n0t-a_res.max0,' ',nd-(n0t-a_res.max0));
1631 memset(seqc1+mins+lenc+nn1-a_res.max1,' ',nd-(nn1-a_res.max1));
1634 memset(seqc0+mins+lenc,' ',nd);
1635 if ((nd-(nn1-a_res.max1))>0) {
1636 aancpy(seqc1+mins+lenc,(char *)aa1p+a_res.max1,nn1-a_res.max1,pst);
1637 memset(seqc1+mins+lenc+nn1-a_res.max1,' ',nd-(nn1-a_res.max1));
1639 else aancpy(seqc1+mins+lenc,(char *)aa1p+a_res.max1,nd,pst);
1642 return mins+lenc+nd;
1645 void aa0shuffle(unsigned char *aa0, int n0, struct f_struct *f_str) {
1650 for (i = f_str->nmoff-1 ; --i ; ) {
1652 /* j = nrand(i); if (i == j) continue;*/ /* shuffle columns */
1653 j = (f_str->nmoff - 2) - i; if (i <= j) break; /* reverse columns */
1655 /* swap all i'th column residues for all j'th column residues */
1656 for(k = 0 ; k < f_str->nm0 ; k++) {
1657 tmp = aa0[(k * (f_str->nmoff)) + i];
1658 aa0[(k * (f_str->nmoff)) + i] = aa0[(k * (f_str->nmoff)) + j];
1659 aa0[(k * (f_str->nmoff)) + j] = tmp;
1664 /* build an array of match/ins/del - length strings */
1665 int calc_code(const unsigned char *aa0, const int n0,
1666 const unsigned char *aa1, const int n1,
1667 struct a_struct *aln,
1668 struct a_res_str a_res,
1670 char *al_str, int al_str_n, struct f_struct *f_str)
1673 int op, lenc, len_gap;
1675 const unsigned char *aa1p;
1682 if (pst.ext_sq_set) {
1698 lenc = len_gap = aln->nident = aln->nsim = aln->ngap_q = aln->ngap_l = aln->nfs = op = p_op = 0;
1705 fnum = f_str->aa0ti[i0] + 1;
1706 while (i0 < a_res.max0 || i1 < a_res.max1) {
1707 if (op == 0 && *rp == 0) {
1708 if (p_op == 0) { op_cnt++;}
1710 update_code(al_str,al_str_n-strlen(al_str),p_op,op_cnt,fnum);
1711 op_cnt = 1; p_op = 0;
1712 fnum = f_str->aa0ti[i0] + 1;
1716 if (pst.pam2[0][f_str->aa0t[i0]][aa1p[i1]]>=0) {aln->nsim++;}
1717 sp0 = pst.sq[f_str->aa0t[i0++]];
1718 sp1 = pst.sq[aa1p[i1++]];
1719 if (toupper(sp0) == toupper(sp1)) aln->nident++;
1722 if (op==0) op = *rp++;
1724 if (p_op == 1) { op_cnt++;}
1726 update_code(al_str,al_str_n - strlen(al_str),p_op,op_cnt,fnum);
1727 op_cnt = 1; p_op = 1; fnum = f_str->aa0ti[i0] + 1;
1729 op--; lenc++; i1++; len_gap++;
1732 if (p_op == 2) { op_cnt++;}
1734 update_code(al_str,al_str_n - strlen(al_str),p_op,op_cnt,fnum);
1735 op_cnt = 1; p_op = 2; fnum = f_str->aa0ti[i0] + 1;
1737 op++; lenc++; i0++; len_gap++;
1741 update_code(al_str,al_str_n - strlen(al_str),p_op,op_cnt,fnum);
1743 return lenc - len_gap;
1747 update_code(char *al_str, int al_str_max, int op, int op_cnt, int fnum) {
1749 char op_char[4]={"=-+"};
1753 sprintf(tmp_cnt,"%c%d[%d]",op_char[op],op_cnt,fnum);
1755 sprintf(tmp_cnt,"%c%d",op_char[op],op_cnt);
1757 strncat(al_str,tmp_cnt,al_str_max);
1760 int calc_id(const unsigned char *aa0, int n0,
1761 const unsigned char *aa1, int n1,
1762 struct a_struct *aln,
1763 struct a_res_str a_res,
1765 struct f_struct *f_str)
1767 int i0, i1, nn1, n0t;
1768 int op, lenc, len_gap;
1769 const unsigned char *aa1p;
1775 if (pst.ext_sq_set) {
1790 /* first fill in the ends */
1791 /* a_res.min0--; a_res.min1--; */
1792 n0 -= (f_str->nm0-1);
1794 /* now get the middle */
1796 n0t = lenc = len_gap = aln->nident = aln->nsim = aln->ngap_q = aln->ngap_l = op = 0;
1800 while (i0 < a_res.max0 || i1 < a_res.max1) {
1801 if (op == 0 && *rp == 0) {
1803 if (pst.pam2[0][f_str->aa0t[i0]][aa1p[i1]]>=0) {aln->nsim++;}
1804 sp0 = sq[f_str->aa0t[i0++]];
1805 sp1 = sq[aa1p[i1++]];
1808 if (toupper(sp0) == toupper(sp1)) aln->nident++;
1811 if (op==0) { op = *rp++;}
1827 return lenc-len_gap;
1832 #include "structs.h"
1836 update_params(struct qmng_str *qm_msg,
1837 struct mngmsg *m_msg, struct pstruct *ppst)
1839 m_msg->n0 = ppst->n0 = qm_msg->n0;
1840 m_msg->nm0 = qm_msg->nm0;
1841 m_msg->escore_flg = qm_msg->escore_flg;
1842 m_msg->qshuffle = qm_msg->qshuffle;