1 /* copyright (c) 1998, 1999 William R. Pearson and the U. of Virginia */
3 /* $Name: fa_34_26_5 $ - $Id: dropfs2.c,v 1.40 2007/02/26 21:56:59 wrp Exp $ */
5 /* changed to return 2.0, rather than -1.0, for failure */
7 /* Feb 4, 2005 - modifications to allow searches with ktup=2 for very
8 long queries. This is a temporary solution to savemax(), spam()
9 which do not preserve exact matches
11 do_fasts() has been modified to allow higher maxsav for do_walign
12 than for do_work (2*nsegs, 6*nsegs)
15 /* this code implements the "fasts" algorithm, which compares a set of
16 protein fragments to a protein sequence. Comma's are used to separate
17 the sequence fragments, which need not be the same length.
19 The expected input is:
26 The fragments do not need to be in the correct order (which is
27 presumably unknown from the peptide sequencing.
42 #define NMAP_X 23 /* for 'X' */
43 #define NMAP_Z 24 /* for '*' */
45 #define NMAP MAXHASH+1
47 static char *verstr="4.32 Feb 2007";
50 #include "drop_func.h"
52 int shscore(const unsigned char *aa0, const int n0, int **pam2, int nsq);
53 static void update_code(char *al_str, int al_str_max, int op, int op_cnt, int fnum);
54 extern void aancpy(char *to, char *from, int count, struct pstruct pst);
57 extern int aatran(const unsigned char *ntseq, unsigned char *aaseq, const int maxs, const int frame);
60 void savemax(struct dstruct *, struct f_struct *, int maxsav, int exact,int t_end);
62 int spam(const unsigned char *, const unsigned char *, int, struct savestr *, int **, struct f_struct *);
63 int sconn(struct savestr **v,
68 const unsigned char *aa0, int n0,
69 const unsigned char *aa1, int n1,
72 void kpsort(struct savestr **, int);
73 void kssort(struct savestr **, int); /* sort by score */
74 int sconn_a(unsigned char *, int,
75 const unsigned char *, int,
79 void kpsort(struct savestr **, int);
81 /* initialize for fasta */
84 init_work (unsigned char *aa0, const int n0,
86 struct f_struct **f_arg
91 int i0, ib, hv, old_hv;
93 struct f_struct *f_str;
94 /* these used to be globals, but do not need to be */
98 int stmp; /* temporary score */
103 unsigned char *query;
104 int k, l, m, n, N, length, index;
108 f_str = (struct f_struct *)calloc(1,sizeof(struct f_struct));
110 ppst->param_u.fa.pgap = ppst->gdelval + ppst->ggapval;
111 ktup = ppst->param_u.fa.ktup;
113 ktup = ppst->param_u.fa.ktup = 2;
115 fact = ppst->param_u.fa.scfact;
117 /* fasts3 cannot work with lowercase symbols as low complexity;
118 thus, NMAP must be disabled; this depends on aascii['X'] */
119 if (ppst->hsq[NMAP_X] == NMAP ) {ppst->hsq[NMAP_X]=1;}
120 if (ppst->hsq[NMAP_Z] == NMAP ) {ppst->hsq[NMAP_Z]=1;}
121 /* this does not work in a threaded environment */
122 /* else {fprintf(stderr," cannot find 'X'==NMAP\n");} */
124 for (i0 = 1, mhv = -1; i0 <= ppst->nsq; i0++)
125 if (ppst->hsq[i0] < NMAP && ppst->hsq[i0] > mhv) mhv = ppst->hsq[i0];
128 fprintf (stderr, " maximum hsq <=0 %d\n", mhv);
132 for (f_str->kshft = 0; mhv > 0; mhv /= 2) f_str->kshft++;
137 for (i0 = 0; i0 < ktup; i0++) hv = hv << f_str->kshft;
139 f_str->hmask = (hmax >> f_str->kshft) - 1;
141 if ((f_str->aa0t = (unsigned char *) calloc(n0+1, sizeof(char))) == NULL) {
142 fprintf (stderr, " cannot allocate f_str0->aa0t array; %d\n",n0+1);
146 if ((f_str->aa0ti = (int *) calloc(n0+1, sizeof(int))) == NULL) {
147 fprintf (stderr, " cannot allocate f_str0->aa0ti array; %d\n",n0+1);
151 if ((f_str->aa0b = (int *) calloc(n0+1, sizeof(int))) == NULL) {
152 fprintf (stderr, " cannot allocate f_str0->aa0b array; %d\n",n0+1);
156 if ((f_str->aa0e = (int *) calloc(n0+1, sizeof(int))) == NULL) {
157 fprintf (stderr, " cannot allocate f_str0->aa0e array; %d\n",n0+1);
161 if ((f_str->aa0i = (int *) calloc(n0+1, sizeof(int))) == NULL) {
162 fprintf (stderr, " cannot allocate f_str0->aa0i array; %d\n",n0+1);
166 if ((f_str->aa0s = (int *) calloc(n0+1, sizeof(int))) == NULL) {
167 fprintf (stderr, " cannot allocate f_str0->aa0s array; %d\n",n0+1);
171 if ((f_str->aa0l = (int *) calloc(n0+1, sizeof(int))) == NULL) {
172 fprintf (stderr, " cannot allocate f_str0->aa0l array; %d\n",n0+1);
176 if ((f_str->harr = (int *) calloc (hmax, sizeof (int))) == NULL) {
177 fprintf (stderr, " cannot allocate hash array: hmax: %d hmask: %d\n",
181 if ((f_str->pamh1 = (int *) calloc (ppst->nsq+1, sizeof (int))) == NULL) {
182 fprintf (stderr, " cannot allocate pamh1 array\n");
185 if ((f_str->pamh2 = (int *) calloc (hmax, sizeof (int))) == NULL) {
186 fprintf (stderr, " cannot allocate pamh2 array\n");
190 if ((f_str->link = (int *) calloc (n0, sizeof (int))) == NULL) {
191 fprintf (stderr, " cannot allocate hash link array");
195 /* for FASTS/FASTM, we want to know when we get to the end of a peptide,
196 so we can ensure that we set the end and restart */
198 if ((f_str->l_end = (int *) calloc (n0, sizeof (int))) == NULL) {
199 fprintf (stderr, " cannot allocate link end array");
203 for (i0 = 0; i0 < hmax; i0++) f_str->harr[i0] = -1;
204 for (i0 = 0; i0 < n0; i0++) f_str->link[i0] = -1;
205 for (i0 = 0; i0 < n0; i0++) f_str->l_end[i0] = 0;
207 /* count the number of peptides */
209 for (i0 = 0; i0 < n0; i0++) {
210 if (aa0[i0] == ESS || aa0[i0] == 0) nsegs++;
213 /* allocate space for peptides offsets, nm_u */
214 if ((f_str->nmoff = (int *)calloc(nsegs+1, sizeof(int)))==NULL) {
215 fprintf(stderr, " cannot allocat nmoff array: %d\n", nsegs);
219 if ((f_str->nm_u = (int *)calloc(nsegs+1, sizeof(int)))==NULL) {
220 fprintf(stderr, " cannot allocat nm_u array: %d\n", nsegs);
228 /* encode the aa0 array */
230 hv = ppst->hsq[aa0[0]];
231 phv = ppst->pam2[0][aa0[0]][aa0[0]];
234 for (i0=kt1 ; i0 < n0; i0++) {
235 if (aa0[i0] == ESS || aa0[i0] == 0) {
236 /* fprintf(stderr," converted %d to 0\n",aa0[i0]); */
237 aa0[i0] = EOSEQ; /* set ESS to 0 */
238 f_str->nmoff[f_str->nm0++] = i0+1;
239 f_str->l_end[i0-1] = 1;
243 hv = ppst->hsq[aa0[i0]];
244 phv = ppst->pam2[0][aa0[i0]][aa0[i0]];
249 hv = ((hv & f_str->hmask) << f_str->kshft) + ppst->hsq[aa0[i0]];
250 f_str->link[i0] = f_str->harr[hv];
251 f_str->harr[hv] = i0;
252 f_str->pamh2[hv] = (phv += ppst->pam2[0][aa0[i0]][aa0[i0]]);
253 phv -= ppst->pam2[0][aa0[i0 - kt1]][aa0[i0 - kt1]];
255 f_str->l_end[n0-1] = 1;
257 f_str->nmoff[f_str->nm0] = n0+1;
261 fprintf(stderr, ">>%s\n",qtitle);
262 for (j=0; j<f_str->nm0; j++) {
263 for (i=f_str->nmoff[j]; i < f_str->nmoff[j+1]-1; i++) {
264 fprintf(stderr,"%c",ppst->sq[aa0[i]]);
266 fprintf(stderr," %d\n",aa0[i]);
269 for (j=1; j<=ppst->nsq; j++) {
270 fprintf(stderr, "%c %d\n", ppst->sq[j], f_str->harr[j]);
273 for (j=0; j<=n0; j++) {
274 fprintf(stderr, "%c %d\n", ppst->sq[aa0[j]], f_str->link[j]);
280 /* build an integer array of the max score that can be achieved
281 from that position - use in savemax to mark some segments as
284 /* setup aa0b[], aa0e[], which specify the begining and end of each
289 for (ib = i0 = 0; i0 < n0; i0++) {
290 f_str->aa0l[i0] = i0 - q;
291 if (aa0[i0]==EOSEQ) {
292 f_str->aa0b[i0] = -1;
293 f_str->aa0e[i0] = -1;
294 f_str->aa0i[i0] = -1;
295 f_str->aa0l[i0] = -1;
297 if (i0 > 0)f_str->aa0s[i0-1] = stmp;
302 stmp += ppst->pam2[0][aa0[i0]][aa0[i0]];
305 f_str->aa0b[i0] = f_str->nmoff[ib];
306 f_str->aa0e[i0] = f_str->nmoff[ib+1]-2;
307 f_str->aa0i[i0] = ib;
310 fprintf(stderr,"%2d %c: %2d %2d %2d\n",i0,ppst->sq[aa0[i0]],
311 f_str->aa0b[i0],f_str->aa0e[i0],f_str->aa0i[i0]);
314 f_str->aa0s[n0-1]=stmp; /* save last best possible score */
316 /* maxsav - maximum number of peptide alignments saved in search */
317 /* maxsav_w - maximum number of peptide alignments saved in
320 f_str->maxsav = max(MAXSAV,2*f_str->nm0);
321 f_str->maxsav_w = max(MAXSAV,6*f_str->nm0);
323 if ((f_str->vmax = (struct savestr *)
324 calloc(f_str->maxsav_w,sizeof(struct savestr)))==NULL) {
325 fprintf(stderr, "Couldn't allocate vmax[%d].\n",f_str->maxsav_w);
329 if ((f_str->vptr = (struct savestr **)
330 calloc(f_str->maxsav_w,sizeof(struct savestr *)))==NULL) {
331 fprintf(stderr, "Couldn't allocate vptr[%d].\n",f_str->maxsav_w);
335 if ((f_str->sarr = (struct slink *)
336 calloc(f_str->maxsav_w,sizeof(struct slink)))==NULL) {
337 fprintf(stderr, "Couldn't allocate sarr[%d].\n",f_str->maxsav_w);
341 /* Tatusov Statistics Setup */
343 /* initialize priors array. */
344 if((f_str->priors = (double *)calloc(ppst->nsq+1, sizeof(double))) == NULL) {
345 fprintf(stderr, "Couldn't allocate priors array.\n");
349 calc_priors(f_str->priors, ppst, f_str, NULL, 0, ppst->pseudocts);
351 /* pre-calculate the Tatusov probability array for each full segment */
353 if(ppst->zsflag >= 1 && ppst->zsflag <= 3 && f_str->nm0 <= 10) {
355 tat_size = (1<<f_str->nm0) -1;
357 f_str->tatprobs = (struct tat_str **) malloc((size_t)tat_size*sizeof(struct tat_str *));
358 if (f_str->tatprobs == NULL) {
359 fprintf (stderr, " cannot allocate tatprobs array: %ld\n",
360 tat_size * sizeof(struct tat_str *));
364 f_str->intprobs = (double **) malloc((size_t)tat_size * sizeof(double *));
365 if(f_str->intprobs == NULL) {
366 fprintf(stderr, "Couldn't allocate intprobs array.\n");
370 for(k = 0, l = f_str->nm0 ; k < l ; k++) {
371 query = &(aa0[f_str->nmoff[k]]);
372 length = f_str->nmoff[k+1] - f_str->nmoff[k] - 1;
374 /* this segment alone */
375 index = (1 << k) - 1;
376 generate_tatprobs(query, 0, length - 1, f_str->priors, ppst->pam2[0], ppst->nsq, &(f_str->tatprobs[index]), NULL);
378 /* integrate the probabilities */
379 N = f_str->tatprobs[index]->highscore - f_str->tatprobs[index]->lowscore;
380 tatprobptr = (double *) calloc(N+1, sizeof(double));
381 if(tatprobptr == NULL) {
382 fprintf(stderr, "Couldn't calloc tatprobptr.\n");
385 f_str->intprobs[index] = tatprobptr;
387 for (i = 0; i <= N ; i++ ) {
388 tatprobptr[i] = f_str->tatprobs[index]->probs[i];
389 for (j = i + 1 ; j <= N ; j++ ) {
390 tatprobptr[i] += f_str->tatprobs[index]->probs[j];
394 /* this segment built on top of all other subcombinations */
395 for(i = 0, j = (1 << k) - 1 ; i < j ; i++) {
396 index = (1 << k) + i;
397 generate_tatprobs(query, 0, length - 1, f_str->priors, ppst->pam2[0], ppst->nsq, &(f_str->tatprobs[index]), f_str->tatprobs[i]);
399 /* integrate the probabilities */
400 N = f_str->tatprobs[index]->highscore - f_str->tatprobs[index]->lowscore;
401 tatprobptr = (double *) calloc(N+1, sizeof(double));
402 if(tatprobptr == NULL) {
403 fprintf(stderr, "Couldn't calloc tatprobptr.\n");
406 f_str->intprobs[index] = tatprobptr;
408 for (m = 0; m <= N ; m++ ) {
409 tatprobptr[m] = f_str->tatprobs[index]->probs[m];
410 for (n = m + 1 ; n <= N ; n++ ) {
411 tatprobptr[m] += f_str->tatprobs[index]->probs[n];
418 f_str->shuff_cnt = ppst->shuff_node;
421 /* End of Tatusov Statistics Setup */
424 for (i0=1; i0<=ppst->nsq; i0++) {
425 fprintf(stderr," %c: %2d ",ppst->sq[i0],f_str->harr[i0]);
426 hv = f_str->harr[i0];
428 fprintf(stderr," %2d",f_str->link[hv]);
429 hv = f_str->link[hv];
431 fprintf(stderr,"\n");
435 /* this has been modified from 0..<ppst->nsq to 1..<=ppst->nsq because the
436 pam2[0][0] is now undefined for consistency with blast
438 for (i0 = 1; i0 <= ppst->nsq; i0++)
439 f_str->pamh1[i0] = ppst->pam2[0][i0][i0];
443 if (f_str->diag==NULL)
444 f_str->diag = (struct dstruct *) calloc ((size_t)MAXDIAG,
445 sizeof (struct dstruct));
446 if (f_str->diag == NULL) {
447 fprintf (stderr, " cannot allocate diagonal arrays: %ld\n",
448 (long) MAXDIAG * (long) (sizeof (struct dstruct)));
453 if ((f_str->aa1x =(unsigned char *)calloc((size_t)ppst->maxlen+2,
454 sizeof(unsigned char)))
456 fprintf (stderr, "cannot allocate aa1x array %d\n", ppst->maxlen+2);
462 maxn0 = max(3*n0/2,MIN_RES);
463 if ((res = (int *)calloc((size_t)maxn0,sizeof(int)))==NULL) {
464 fprintf(stderr,"cannot allocate alignment results array %d\n",maxn0);
468 f_str->max_res = maxn0;
474 /* pstring1 is a message to the manager, currently 512 */
475 /* pstring2 is the same information, but in a markx==10 format */
477 get_param (struct pstruct *pstr, char *pstring1, char *pstring2)
481 char *pg_str="FASTS";
483 char *pg_str="TFASTS";
489 char *pg_str="FASTM";
491 char *pg_str="TFASTM";
495 sprintf (pstring1, "%s (%s) function [%s matrix (%d:%d)] ktup=%d",pg_str,verstr,
496 pstr->pamfile, pstr->pam_h,pstr->pam_l, pstr->param_u.fa.ktup);
497 if (pstr->param_u.fa.iniflag) strcat(pstring1," init1");
499 if (pstr->zsflag==0) strcat(pstring1," not-scaled");
500 else if (pstr->zsflag==1) strcat(pstring1," reg.-scaled");
502 if (pstring2 != NULL) {
503 sprintf (pstring2, "; pg_name: %s\n; pg_ver: %s\n; pg_matrix: %s (%d:%d)\n\
504 ; pg_gap-pen: %d %d\n; pg_ktup: %d\n",
505 pg_str,verstr,pstr->pamfile, pstr->pam_h,pstr->pam_l, pstr->gdelval,
506 pstr->ggapval,pstr->param_u.fa.ktup);
511 close_work (const unsigned char *aa0, const int n0,
512 struct pstruct *ppst,
513 struct f_struct **f_arg)
515 struct f_struct *f_str;
524 free(f_str->aa1x - 1); /* because f_str->aa1x got ++'ed when allocated! */
544 for(i = 0, j = (1 << f_str->nm0) - 1 ; i < j ; i++) {
545 free(f_str->tatprobs[i]->probs);
546 free(f_str->tatprobs[i]);
547 free(f_str->intprobs[i]);
549 free(f_str->tatprobs);
550 free(f_str->intprobs);
559 void do_fasts (const unsigned char *aa0, const int n0,
560 const unsigned char *aa1, const int n1,
561 struct pstruct *ppst, struct f_struct *f_str,
562 struct rstruct *rst, int *hoff, int opt_prob,
565 int nd; /* diagonal array size */
568 register struct dstruct *dptr;
570 register struct dstruct *diagp;
571 struct dstruct *dpmax;
574 struct savestr *vmptr, *vmaxmax;
577 int cmps (); /* comparison routine for ksort */
582 vmaxmax = &f_str->vmax[maxsav];
584 ktup = ppst->param_u.fa.ktup;
587 rst->score[0] = rst->score[1] = rst->score[2] = 0;
594 if (n0+n1+1 >= MAXDIAG) {
595 fprintf(stderr,"n0,n1 too large: %d, %d\n",n0,n1);
596 rst->score[0] = rst->score[1] = rst->score[2] = -1;
605 dpmax = &f_str->diag[nd];
606 for (dptr = &f_str->diag[f_str->ndo]; dptr < dpmax;)
613 for (vmptr = f_str->vmax; vmptr < vmaxmax; vmptr++) {
617 f_str->lowmax = f_str->vmax;
621 diagp = &f_str->diag[f_str->noff];
622 for (lhval=lpos=0; lpos < n1; lpos++, diagp++) {
623 if (ppst->hsq[aa1[lpos]]>=NMAP) { /* skip residue */
625 while (lpos < n1 && ppst->hsq[aa1[lpos]]>=NMAP) {lpos++; diagp++;}
626 if (lpos >= n1) break;
630 lhval = ((lhval & f_str->hmask) << f_str->kshft) + ppst->hsq[aa1[lpos]];
632 for (tpos = f_str->harr[lhval]; tpos >= 0; tpos = f_str->link[tpos]) {
634 dptr = &diagp[-tpos];
636 if (f_str->l_end[tpos]) {
637 if (dptr->score + f_str->pamh1[aa0[tpos]] == f_str->aa0s[tpos]) {
639 dptr->score = f_str->aa0s[tpos];
640 savemax(dptr, f_str, maxsav, 1, tpos);
644 else if (dptr->score + f_str->pamh1[aa0[tpos]] > f_str->aa0s[tpos]) {
646 fprintf(stderr,"exact match score too high: %d:%d %d < %d + %d - %d:%d - %d > %d\n",
647 tpos, lpos, f_str->aa0s[tpos],dptr->score, f_str->pamh1[aa0[tpos]],
648 dptr->start, dptr->stop,
649 dptr->stop - dptr->start, f_str->aa0l[tpos]);
652 dptr->start = lpos - f_str->aa0l[tpos];
653 dptr->score = f_str->aa0s[tpos];
654 savemax(dptr, f_str, maxsav, 1, tpos);
658 else if ((tscor = dptr->stop) >= 0) {
659 tscor++; /* tscor is stop of current, increment it */
660 if ((tscor -= lpos) <= 0) { /* tscor, the end of the current
661 match, is before lpos, so there
662 is a mismatch - this is also the
665 scor = dptr->score; /* save the run score on the diag */
666 if ((tscor += (kfact = f_str->pamh2[lhval])) < 0
667 && f_str->lowscor < scor) {
668 /* if what we will get (tscor + kfact) is < 0 and the
669 score is better than the worst savemax() score, save
671 savemax (dptr, f_str, maxsav,0,-1);
674 /* if extending is better than starting over, extend */
675 if ((tscor += scor) >= kfact) {
678 if (f_str->l_end[tpos]) {
679 if (dptr->score == f_str->aa0s[tpos]) {
680 savemax(dptr, f_str, maxsav,1,tpos);
683 else if (dptr->score > f_str->lowscor)
684 savemax(dptr, f_str, maxsav,0,tpos);
687 else { /* otherwise, start new */
689 dptr->start = dptr->stop = lpos;
692 else { /* tscor is after lpos, so extend one residue */
693 dptr->score += f_str->pamh1[aa0[tpos]];
695 if (f_str->l_end[tpos]) {
696 if (dptr->score == f_str->aa0s[tpos]) {
697 savemax(dptr, f_str, maxsav,1,tpos);
700 else if (dptr->score > f_str->lowscor)
701 savemax(dptr, f_str, maxsav,0,tpos);
705 else { /* start new */
706 dptr->score = f_str->pamh2[lhval];
707 dptr->start = dptr->stop = lpos;
712 for (dptr = f_str->diag; dptr < dpmax;) {
713 if (dptr->score > f_str->lowscor) savemax (dptr, f_str, maxsav,0,-1);
721 at this point all of the elements of aa1[lpos]
722 have been searched for elements of aa0[tpos]
723 with the results in diag[dpos]
726 for (nsave=0, vmptr=f_str->vmax; vmptr< vmaxmax; vmptr++) {
727 if (vmptr->score > 0) {
730 fprintf(stderr,"%c 0: %4d-%4d 1: %4d-%4d dp: %d score: %d",
731 (vmptr->exact ? 'x' : ' '),
732 f_str->noff+vmptr->start-vmptr->dp,
733 f_str->noff+vmptr->stop-vmptr->dp,
734 vmptr->start,vmptr->stop,
735 vmptr->dp,vmptr->score);
737 vmptr->score = spam (aa0, aa1, n1, vmptr, ppst->pam2[0], f_str);
739 fprintf(stderr," sscore: %d %d-%d\n",vmptr->score,vmptr->start,vmptr->stop);
741 if (vmptr->score > 0) f_str->vptr[nsave++] = vmptr;
746 rst->score[0] = rst->score[1] = rst->score[2] = 0;
755 fprintf(stderr,"n0: %d; n1: %d; noff: %d\n",n0,n1,f_str->noff);
756 for (ib=0; ib<nsave; ib++) {
757 fprintf(stderr,"%c 0: %4d-%4d 1: %4d-%4d dp: %d score: %d\n",
758 f_str->vptr[ib]->exact ? 'x' : ' ',
759 f_str->noff+f_str->vptr[ib]->start-f_str->vptr[ib]->dp,
760 f_str->noff+f_str->vptr[ib]->stop-f_str->vptr[ib]->dp,
761 f_str->vptr[ib]->start,f_str->vptr[ib]->stop,
762 f_str->vptr[ib]->dp,f_str->vptr[ib]->score);
765 fprintf(stderr,"---\n");
767 kssort(f_str->vptr,nsave);
769 /* make certain each seg is used only once */
771 for (ib=0; ib<f_str->nm0; ib++) f_str->nm_u[ib]=0;
772 for (ib=0; ib < nsave; ib++) {
773 doffset = f_str->vptr[ib]->dp - f_str->noff;
774 tpos=f_str->aa0i[f_str->vptr[ib]->start - doffset];
775 if (f_str->nm_u[tpos] == 0) {
778 f_str->vptr[ib]->score = -1;
782 kssort(f_str->vptr,nsave);
783 for (ib = nsave-1; ib >= 0; ib--)
784 if (f_str->vptr[ib]->score > -1) break;
787 scor = sconn (f_str->vptr, nsave,
788 f_str, rst, ppst, aa0, n0, aa1, n1,
791 if (rst->escore < 0.0) rst->escore = 2.0;
792 kssort(f_str->vptr,nsave);
794 /* here we should use an nsave that is consistent with sconn and nm0 */
796 f_str->nsave = nsave;
797 if (nsave > f_str->nm0) f_str->nsave = f_str->nm0;
799 rst->score[1] = f_str->vptr[0]->score;
800 rst->score[0] = rst->score[2] = max(scor, f_str->vptr[0]->score);
804 void do_work (const unsigned char *aa0, const int n0,
805 const unsigned char *aa1, const int n1,
807 struct pstruct *ppst, struct f_struct *f_str,
808 int qr_flg, struct rstruct *rst)
813 if (qr_flg==1 && f_str->shuff_cnt <= 0) {
815 rst->score[0]=rst->score[1]=rst->score[2]= -1;
819 if (f_str->dotat || ppst->zsflag == 4 || ppst->zsflag == 14 ) opt_prob=1;
821 if (ppst->zsflag == 2 || ppst->zsflag == 12) opt_prob = 0;
828 if (n1 < ppst->param_u.fa.ktup) {
829 rst->score[0] = rst->score[1] = rst->score[2] = -1;
834 n10=aatran(aa1,f_str->aa1x,n1,frame);
836 for (i=0; i<n10; i++)
837 if (f_str->aa1x[i]>ppst->nsq) {
839 "residue[%d/%d] %d range (%d)\n",i,n1,
840 f_str->aa1x[i],ppst->nsq);
845 do_fasts (aa0, n0, f_str->aa1x, n10, ppst, f_str, rst, &hoff, opt_prob, f_str->maxsav);
847 do_fasts (aa0, n0, aa1, n1, ppst, f_str, rst, &hoff, opt_prob, f_str->maxsav);
850 rst->comp = rst->H = -1.0;
853 void do_opt (const unsigned char *aa0, const int n0,
854 const unsigned char *aa1, const int n1,
856 struct pstruct *ppst, struct f_struct *f_str,
859 int lag, tscore, hoff, n10;
862 n10=aatran(aa1,f_str->aa1x,n1,frame);
863 do_fasts (aa0, n0, f_str->aa1x, n10, ppst, f_str, rst, &hoff, 1, f_str->maxsav);
865 do_fasts(aa0,n0,aa1,n1,ppst,f_str,rst, &hoff, 1, f_str->maxsav);
870 /* modify savemax() so that full length 100% matches are marked
871 so that they cannot be removed - if we have a 100% match, mark "exact"
873 modify savemax() to split alignments that include a comma
876 /* savemax(dptr, f_str, maxsav) takes a current diagonal run (saved in dptr),
877 and places it in the set of runs to be saved (in f_str->vmax[])
881 savemax (struct dstruct *dptr, struct f_struct *f_str, int maxsav,
884 register int dpos; /* position along the diagonal, -n0 .. n1 */
886 register struct savestr *vmptr;
887 struct savestr *vmaxmax;
889 vmaxmax = &f_str->vmax[maxsav];
891 dpos = (int) (dptr - f_str->diag); /* current diagonal */
893 /* check to see if this is the continuation of a run that is already saved */
894 /* if we are at the end of the query, save it regardless */
896 /* if (t_end > 0 && t_end < dptr->stop - dptr->start) {return;} */
898 if ((vmptr = dptr->dmax) != NULL /* have an active run */
899 && vmptr->dp == dpos && /* on the correct diagonal */
900 vmptr->start == dptr->start) { /* and it starts at the same place */
901 vmptr->stop = dptr->stop; /* update the end of the match in vmax[] */
905 fprintf(stderr,"have cont exact match: %d - %d:%d %d:%d = %d\n",
906 dptr->score, dptr->start, dptr->stop,
907 vmptr->start, vmptr->stop, dptr->stop - dptr->start+1);
913 /* if the score is worse, don't update, return - if the score gets bad
914 enough, it will restart in the diagonal scan */
915 if ((i = dptr->score) <= vmptr->score) { return;}
917 /* score is better, update */
920 vmptr->exact = exact;
921 /* if the score is not the worst, return */
922 if (vmptr != f_str->lowmax) { return;}
924 else { /* not a continuation */
925 /* save in the lowest place */
927 fprintf(stderr," Replacing: %d - %d:%d => %d - %d:%d",
928 f_str->lowmax->score, f_str->lowmax->start, f_str->lowmax->stop,
929 dptr->score, dptr->start, dptr->stop);
932 vmptr = f_str->lowmax;
936 fprintf(stderr,"have new exact match: %d - %d:%d = %d\n",
937 dptr->score, dptr->start, dptr->stop, dptr->stop - dptr->start+1);
940 vmptr->exact = exact;
942 i = vmptr->score = dptr->score; /* 'i' is used as a bound */
944 vmptr->start = dptr->start;
945 vmptr->stop = dptr->stop;
949 /* rescan the list for the worst score */
950 for (vmptr = f_str->vmax; vmptr < &f_str->vmax[maxsav] ; vmptr++) {
951 if (vmptr->score < i && !vmptr->exact) {
953 f_str->lowmax = vmptr;
960 /* this version of spam scans the diagonal to find the best local score,
961 then resets the boundaries for a global alignment and re-scans */
963 /* NOOVERHANG allows one to score any overhanging alignment as zero.
964 Useful for SAGE alignments. Normally, one allows overhangs because
965 of the possibility of partial sequences.
971 May, 2005 - spam() has an intesting bug that occurs when two
972 peptides match in order, separated by one position (the comma). In
973 this case, spam() splits the match, and only returns the better of
974 the two matches. So, if spam splits an alignment at a comma, it
975 needs the ability to insert the missing match.
979 int spam (const unsigned char *aa0, const unsigned char *aa1,int n1,
980 struct savestr *dmax, int **pam2,
981 struct f_struct *f_str)
986 int start, stop, score;
988 register const unsigned char *aa0p, *aa1p;
990 doffset = dmax->dp - f_str->noff;
991 curv.start = dmax->start;
992 aa1p = &aa1[dmax->start];
993 aa0p = &aa0[dmax->start - doffset];
995 tot = curv.score = maxv.score = 0;
996 for (lpos = dmax->start; lpos <= dmax->stop; lpos++) {
997 tot += pam2[*aa0p++][*aa1p++];
998 if (tot > curv.score) {
999 curv.stop = lpos; /* here, curv.stop is actually curv.max */
1003 if (curv.score > maxv.score) {
1004 maxv.start = curv.start;
1005 maxv.stop = curv.stop;
1006 maxv.score = curv.score;
1008 tot = curv.score = 0;
1009 curv.start = lpos+1;
1013 if (curv.score > maxv.score) {
1014 maxv.start = curv.start;
1015 maxv.stop = curv.stop;
1016 maxv.score = curv.score;
1019 if (maxv.score <= 0) return 0;
1021 /* now, reset the boundaries of the alignment using aa0b[]
1022 and aa0e[], which specify the residues that start and end
1025 maxv.start = f_str->aa0b[maxv.stop-doffset] + doffset;
1026 if (maxv.start < 0) {
1033 maxv.stop = f_str->aa0e[maxv.stop-doffset] + doffset;
1034 if (maxv.stop > n1) {
1040 aa1p = &aa1[lpos = maxv.start];
1041 aa0p = &aa0[lpos - doffset];
1043 for (tot=0; lpos <= maxv.stop; lpos++) {
1044 tot += pam2[*aa0p++][*aa1p++];
1049 /* if (maxv.start != dmax->start || maxv.stop != dmax->stop)
1050 printf(" new region: %3d %3d %3d %3d\n",maxv.start,
1051 dmax->start,maxv.stop,dmax->stop);
1053 dmax->start = maxv.start;
1054 dmax->stop = maxv.stop;
1059 int sconn (struct savestr **v, int n,
1060 struct f_struct *f_str,
1061 struct rstruct *rst, struct pstruct *ppst,
1062 const unsigned char *aa0, int n0,
1063 const unsigned char *aa1, int n1, int opt_prob)
1066 struct slink *start, *sl, *sj, *so, *sarr;
1067 int lstart, ltmp, tstart, plstop, ptstop, ptstart, tstop;
1073 /* sort the score left to right in lib pos */
1080 /* for the remaining runs, see if they fit */
1081 /* lstart/lstop -> start/stop in library sequence
1082 tstart/tstop -> start/stop in query sequence
1086 for (i = 0, si = 0; i < n; i++) {
1088 /* the segment is worth adding; find out where? */
1089 lstart = v[i]->start;
1091 tstart = lstart - v[i]->dp + f_str->noff;
1092 tstop = ltmp - v[i]->dp + f_str->noff;
1094 /* put the run in the group */
1096 sarr[si].score = v[i]->score;
1097 sarr[si].next = NULL;
1098 sarr[si].prev = NULL;
1099 sarr[si].tat = NULL;
1102 opt_prob for FASTS only has to do with using aa1 for priors,
1103 i.e. we always calculate tatprobs for segments in FASTS (unlike
1108 calc_tatusov(NULL, &sarr[si], aa0, n0, aa1, n1,
1109 ppst->pam2[0], ppst->nsq, f_str,
1110 ppst->pseudocts, opt_prob, ppst->zsflag);
1111 if (sarr[si].tatprob < 0.0) {
1112 fprintf(stderr," negative tatprob: %lg\n",sarr[si].tatprob);
1113 sarr[si].tatprob = 1.0;
1115 sarr[si].tat = sarr[si].newtat;
1118 /* if it fits, then increase the score
1120 start points to the highest scoring run
1121 -> next is the second highest, etc.
1122 put the segment into the highest scoring run that it fits into
1124 for (sl = start; sl != NULL; sl = sl->next) {
1125 ltmp = sl->vp->start;
1126 /* plstop -> previous lstop */
1127 plstop = sl->vp->stop;
1128 /* ptstart -> previous t(query) start */
1129 ptstart = ltmp - sl->vp->dp + f_str->noff;
1130 /* ptstop -> previous t(query) stop */
1131 ptstop = plstop - sl->vp->dp + f_str->noff;
1133 /* if the previous library stop is before the current library start */
1134 if (plstop < lstart && ( ptstop < tstart || ptstart > tstop))
1136 /* if the previous library stop is before the current library start */
1137 if (plstop < lstart && ptstop < tstart)
1141 sarr[si].score = sl->score + v[i]->score;
1145 tatprob = calc_tatusov(sl, &sarr[si], aa0, n0, aa1, n1,
1146 ppst->pam2[0], ppst->nsq, f_str,
1147 ppst->pseudocts, opt_prob, ppst->zsflag);
1148 /* if our tatprob gets worse when we add this, forget it */
1149 if(tatprob > sarr[si].tatprob) {
1150 free(sarr[si].newtat->probs); /* get rid of new tat struct */
1151 free(sarr[si].newtat);
1152 continue; /* reuse this sarr[si] */
1154 sarr[si].tatprob = tatprob;
1155 free(sarr[si].tat->probs); /* get rid of old tat struct */
1157 sarr[si].tat = sarr[si].newtat;
1159 sarr[si].score = sl->score + v[i]->score;
1161 fprintf(stderr,"sconn %d added %d/%d getting %d; si: %d, tat: %g\n",
1162 i,v[i]->start, v[i]->score,sarr[si].score,si, tatprob);
1170 /* now recalculate where the score fits */
1171 if (start == NULL) start = &sarr[si];
1174 for (sj = start, so = NULL; sj != NULL; sj = sj->next) {
1175 if (sarr[si].score > sj->score) {
1178 so->next = &sarr[si];
1186 for (sj = start, so = NULL; sj != NULL; sj = sj->next) {
1187 if ( sarr[si].tatprob < sj->tatprob ||
1188 ((sarr[si].tatprob == sj->tatprob) && sarr[si].score > sj->score) ) {
1191 so->next = &sarr[si];
1205 for (i = 0 ; i < si ; i++) {
1206 free(sarr[i].tat->probs);
1211 if (start != NULL) {
1213 rst->escore = start->tatprob;
1218 rst->segnum = rst->seglen = 0;
1219 for(sj = start ; sj != NULL; sj = sj->prev) {
1221 rst->seglen += sj->vp->stop - sj->vp->start + 1;
1223 return (start->score);
1228 rst->segnum = rst->seglen = 0;
1234 struct savestr *v[];
1238 struct savestr *tmp;
1240 for (gap = n / 2; gap > 0; gap /= 2)
1241 for (i = gap; i < n; i++)
1242 for (j = i - gap; j >= 0; j -= gap)
1244 if (v[j]->score >= v[j + gap]->score)
1254 struct savestr *v[];
1258 struct savestr *tmp;
1260 for (gap = n / 2; gap > 0; gap /= 2)
1261 for (i = gap; i < n; i++)
1262 for (j = i - gap; j >= 0; j -= gap)
1264 if (v[j]->start <= v[j + gap]->start)
1272 /* calculate the 100% identical score */
1274 shscore(const unsigned char *aa0, const int n0, int **pam2, int nsq)
1277 for (i=0,sum=0; i<n0; i++)
1278 if (aa0[i] != EOSEQ && aa0[i]<=nsq) sum += pam2[aa0[i]][aa0[i]];
1282 /* sorts alignments from right to left (back to front) based on stop */
1286 struct savestr *v[];
1290 struct savestr *tmp;
1292 for (gap = n / 2; gap > 0; gap /= 2)
1293 for (i = gap; i < n; i++)
1294 for (j = i - gap; j >= 0; j -= gap)
1296 if (v[j]->stop > v[j + gap]->stop)
1304 int do_walign (const unsigned char *aa0, int n0,
1305 const unsigned char *aa1, int n1,
1307 struct pstruct *ppst,
1308 struct f_struct *f_str,
1309 struct a_res_str *a_res,
1315 unsigned char *aa0t;
1316 const unsigned char *aa1p;
1317 struct savestr *vmptr;
1320 f_str->n10 = n10 = aatran(aa1,f_str->aa1x,n1,frame);
1327 do_fasts(aa0, n0, aa1p, n10, ppst, f_str, &rst, &hoff, 1, f_str->maxsav_w);
1329 /* the alignment portion takes advantage of the information left
1330 over in f_str after do_fasts is done. in particular, it is
1331 easy to run a modified sconn() to produce the alignments.
1333 unfortunately, the alignment display routine wants to have
1334 things encoded as with bd_align and sw_align, so we need to do that.
1337 /* unnecessary; do_fasts just did this */
1338 /* kssort(f_str->vptr,f_str->nsave); */
1340 /* at some point, we want one best score for each of the segments */
1342 for ( ; f_str->nsave > 0; f_str->nsave--)
1343 if (f_str->vptr[f_str->nsave-1]->score >0) break;
1345 if ((aa0t = (unsigned char *)calloc(n0+1,sizeof(unsigned char)))==NULL) {
1346 fprintf(stderr," cannot allocate aa0t %d\n",n0+1);
1350 /* copy aa0[] into f_str->aa0t[] */
1351 for (i=0; i<n0; i++) f_str->aa0t[i] = aa0t[i] = aa0[i];
1352 f_str->aa0t[i] = aa0t[i] = '\0';
1354 a_res->nres = sconn_a (aa0t,n0,aa1p,n10,f_str, a_res, ppst);
1358 a_res->res = f_str->res;
1360 return rst.score[0];
1363 /* this version of sconn is modified to provide alignment information */
1364 /* in addition, it needs to know whether a segment has been used before */
1366 /* sconn_a fills in the res[nres] array, but this is passed implicitly
1367 through f_str->res[f_str->nres] */
1369 int sconn_a (unsigned char *aa0, int n0,
1370 const unsigned char *aa1, int n1,
1371 struct f_struct *f_str,
1372 struct a_res_str *a_res,
1373 struct pstruct *ppst)
1375 int i, si, cmpp (), n;
1376 unsigned char *aa0p;
1377 int sx, dx, doff, *aa0tip;
1380 struct slink *start, *sl, *sj, *so, *sarr;
1381 int lstart, lstop, ltmp, plstart, tstart, plstop, ptstop, ptstart, tstop;
1383 int *res, nres, tres;
1387 /* sort the score left to right in lib pos */
1393 /* set things up in case nothing fits */
1394 if (n <=0 || v[0]->score <= 0) return 0;
1396 if (v[0]->score < 0) {
1398 sarr[0].score = v[0]->score;
1399 sarr[0].next = NULL;
1400 sarr[0].prev = NULL;
1405 krsort (v, n); /* sort from left to right in library */
1409 /* for each alignment, see if it fits */
1412 for (i = 0, si = 0; i < n; i++) {
1413 /* if the score is less than the join threshold, skip it */
1415 if (v[i]->score < 0) continue;
1417 lstart = v[i]->start;
1419 tstart = lstart - v[i]->dp + f_str->noff;
1420 tstop = lstop - v[i]->dp + f_str->noff;
1422 /* put the alignment in the group */
1425 sarr[si].score = v[i]->score;
1426 sarr[si].next = NULL;
1427 sarr[si].prev = NULL;
1428 sarr[si].tat = NULL;
1431 calc_tatusov(NULL, &sarr[si], aa0, n0, aa1, n1,
1432 ppst->pam2[0], ppst->nsq, f_str,
1433 ppst->pseudocts, 1, ppst->zsflag);
1434 sarr[si].tat = sarr[si].newtat;
1437 /* if it fits, then increase the score */
1438 /* start points to a sorted (by total score) list of candidate
1441 for (sl = start; sl != NULL; sl = sl->next) {
1442 plstart = sl->vp->start;
1443 plstop = sl->vp->stop;
1444 ptstart = plstart - sl->vp->dp + f_str->noff;
1445 ptstop = plstop - sl->vp->dp + f_str->noff;
1447 if (plstart > lstop && (ptstop < tstart || ptstart > tstop)) {
1449 if (plstop > lstart && ptstart > tstop) {
1451 /* alignment always uses probabilistic scoring ... */
1452 /* sarr[si].score = sl->score + v[i]->score;
1454 break; */ /* quit as soon as the alignment has been added */
1456 tatprob = calc_tatusov(sl, &sarr[si], aa0, n0, aa1, n1,
1457 ppst->pam2[0], ppst->nsq, f_str,
1458 ppst->pseudocts, 1, ppst->zsflag);
1459 /* if our tatprob gets worse when we add this, forget it */
1460 if(tatprob > sarr[si].tatprob) {
1461 free(sarr[si].newtat->probs); /* get rid of new tat struct */
1462 free(sarr[si].newtat);
1463 continue; /* reuse this sarr[si] */
1465 sarr[si].tatprob = tatprob;
1466 free(sarr[si].tat->probs); /* get rid of old tat struct */
1468 sarr[si].tat = sarr[si].newtat;
1470 sarr[si].score = sl->score + v[i]->score;
1472 fprintf(stderr,"sconn %d added %d/%d getting %d; si: %d, tat: %g\n",
1473 i,v[i]->start, v[i]->score,sarr[si].score,si, tatprob);
1480 /* now recalculate the list of best scores */
1482 start = &sarr[si]; /* put the first one in the list */
1484 for (sj = start, so = NULL; sj != NULL; sj = sj->next) {
1485 /* if (sarr[si].score > sj->score) { */ /* new score better than old */
1486 if ( sarr[si].tatprob < sj->tatprob ||
1487 ((sarr[si].tatprob == sj->tatprob) && sarr[si].score > sj->score) ) {
1488 sarr[si].next = sj; /* next best after new score */
1490 so->next = &sarr[si]; /* prev_best->next points to best */
1491 else start = &sarr[si]; /* start points to best */
1492 break; /* stop looking */
1494 so = sj; /* previous candidate best */
1496 si++; /* increment to next alignment */
1500 for (i = 0 ; i < si ; i++) {
1501 free(sarr[i].tat->probs);
1508 aa0tip = f_str->aa0ti; /* point to temporary index */
1509 a_res->min1 = start->vp->start;
1512 for (sj = start; sj != NULL; sj = sj->prev ) {
1513 doff = (int)(aa0p-aa0) - (sj->vp->start-sj->vp->dp+f_str->noff);
1515 /* fprintf(stderr,"doff: %3d\n",doff); */
1517 for (dx=sj->vp->start,sx=sj->vp->start-sj->vp->dp+f_str->noff;
1518 dx <= sj->vp->stop; dx++) {
1519 *aa0tip++ = f_str->aa0i[sx]; /* save index */
1520 *aa0p++ = f_str->aa0t[sx++]; /* save sequence at index */
1525 if (sj->prev != NULL) {
1526 if (sj->prev->vp->start - sj->vp->stop - 1 > 0 )
1527 tres += res[nres++] = (sj->prev->vp->start - sj->vp->stop - 1);
1531 fprintf(stderr,"t0: %3d, tx: %3d, l0: %3d, lx: %3d, dp: %3d noff: %3d, score: %3d\n",
1532 sj->vp->start - sj->vp->dp + f_str->noff,
1533 sj->vp->stop - sj->vp->dp + f_str->noff,
1534 sj->vp->start,sj->vp->stop,sj->vp->dp,
1535 f_str->noff,sj->vp->score);
1537 fprintf(stderr,"%3d - %3d: %3d\n",
1538 sj->vp->start,sj->vp->stop,sj->vp->score);
1540 a_res->max1 = sj->vp->stop+1;
1541 a_res->max0 = a_res->max1 - sj->vp->dp + f_str->noff;
1545 fprintf(stderr,"(%3d - %3d):(%3d - %3d)\n",
1546 a_res->min0,a_res->max0,a_res->min1,a_res->max1);
1549 /* now replace f_str->aa0t with aa0
1550 (f_str->aa0t is permanent, aa0 is not)*/
1551 for (i=0; i<n0; i++) f_str->aa0t[i] = aa0[i];
1556 /* for fasts (and fastf), pre_cons needs to set up f_str as well as do
1557 necessary translations - for right now, simply do do_walign */
1560 pre_cons(const unsigned char *aa1, int n1, int frame, struct f_struct *f_str) {
1563 f_str->n10=aatran(aa1,f_str->aa1x,n1,frame);
1568 /* aln_func_vals - set up aln.qlfact, qlrev, llfact, llmult, frame, llrev */
1569 /* call from calcons, calc_id, calc_code */
1571 aln_func_vals(int frame, struct a_struct *aln) {
1576 aln->llfact = aln->llmult = 3;
1577 if (frame > 3) aln->llrev = 1;
1578 else aln->llrev = 0;
1581 aln->llfact = aln->llmult = aln->qlfact = 1;
1582 aln->llrev = aln->qlrev = 0;
1589 int calcons(const unsigned char *aa0, int n0,
1590 const unsigned char *aa1, int n1,
1592 struct a_struct *aln,
1593 struct a_res_str a_res,
1595 char *seqc0, char *seqc1, char *seqca,
1596 struct f_struct *f_str)
1598 int i0, i1, nn1, n0t;
1599 int op, lenc, len_gap, nd, ns, itmp;
1600 const unsigned char *aa1p;
1601 char *sp0, *sp1, *spa;
1613 aln->amin0 = a_res.min0;
1614 aln->amin1 = a_res.min1;
1615 aln->amax0 = a_res.max0;
1616 aln->amax1 = a_res.max1;
1618 /* first fill in the ends */
1619 n0 -= (f_str->nm0-1);
1621 if (min(a_res.min0,a_res.min1)<aln->llen || aln->showall==1)
1622 /* will we show all the start ?*/
1623 if (a_res.min0>=a_res.min1) { /* aa0 extends more to left */
1625 if (aln->showall==1) mins=a_res.min0;
1626 else mins = min(a_res.min0,aln->llen/2);
1627 aancpy(seqc0,(char *)f_str->aa0t+a_res.min0-mins,mins,pst);
1628 aln->smin0 = a_res.min0-mins;
1629 if ((mins-a_res.min1)>0) {
1630 memset(seqc1,' ',mins-a_res.min1);
1631 aancpy(seqc1+mins-a_res.min1,(char *)aa1p,a_res.min1,pst);
1635 aancpy(seqc1,(char *)aa1p+a_res.min1-mins,mins,pst);
1636 aln->smin1 = a_res.min1-mins;
1641 if (aln->showall == 1) mins=a_res.min1;
1642 else mins = min(a_res.min1,aln->llen/2);
1643 aancpy(seqc1,(char *)(aa1p+a_res.min1-mins),mins,pst);
1644 aln->smin1 = a_res.min1-mins;
1645 if ((mins-a_res.min0)>0) {
1646 memset(seqc0,' ',mins-a_res.min0);
1647 aancpy(seqc0+mins-a_res.min0,(char *)f_str->aa0t,a_res.min0,pst);
1651 aancpy(seqc0,(char *)f_str->aa0t+a_res.min0-mins,mins,pst);
1652 aln->smin0 = a_res.min0-mins;
1656 mins= min(aln->llen/2,min(a_res.min0,a_res.min1));
1658 aln->smin0=a_res.min0;
1659 aln->smin1=a_res.min1;
1660 aancpy(seqc0,(char *)f_str->aa0t+a_res.min0-mins,mins,pst);
1661 aancpy(seqc1,(char *)aa1p+a_res.min1-mins,mins,pst);
1664 memset(seqca,M_BLANK,mins);
1666 /* now get the middle */
1672 n0t = lenc = len_gap = aln->nident = aln->nsim = aln->ngap_q = aln->ngap_l = op = 0;
1676 /* op is the previous "match/insert" operator; *rp is the current
1677 operator or repeat count */
1679 while (i0 < a_res.max0 || i1 < a_res.max1) {
1680 if (op == 0 && *rp == 0) { /* previous was match (or start), current is match */
1681 op = *rp++; /* get the next match/insert operator */
1683 /* get the alignment symbol */
1684 if ((itmp=pst.pam2[0][f_str->aa0t[i0]][aa1p[i1]])<0) { *spa = M_NEG; }
1685 else if (itmp == 0) { *spa = M_ZERO;}
1686 else {*spa = M_POS;}
1687 if (*spa == M_ZERO || *spa == M_POS) { aln->nsim++;}
1689 *sp0 = pst.sq[f_str->aa0t[i0++]]; /* get the residues for the consensus */
1690 *sp1 = pst.sq[aa1p[i1++]];
1693 if (toupper(*sp0) == toupper(*sp1)) {aln->nident++; *spa = M_IDENT;}
1694 sp0++; sp1++; spa++;
1696 else { /* either op != 0 (previous was insert) or *rp != 0
1697 (current is insert) */
1698 if (op==0) { op = *rp++;} /* previous was match, start insert */
1699 /* previous was insert - count through gap */
1701 *sp1++ = pst.sq[aa1p[i1++]];
1711 /* now we have the middle, get the right end */
1713 ns = mins + lenc + aln->llen;
1714 ns -= (itmp = ns %aln->llen);
1715 if (itmp>aln->llen/2) ns += aln->llen;
1716 nd = ns - (mins+lenc);
1717 if (nd > max(n0t-a_res.max0,nn1-a_res.max1)) nd = max(n0t-a_res.max0,nn1-a_res.max1);
1719 if (aln->showall==1) {
1720 nd = max(n0t-a_res.max0,nn1-a_res.max1); /* reset for showall=1 */
1722 aancpy(seqc0+mins+lenc,(char *)f_str->aa0t+a_res.max0,n0t-a_res.max0,pst);
1723 aancpy(seqc1+mins+lenc,(char *)aa1p+a_res.max1,nn1-a_res.max1,pst);
1724 /* fill with blanks - this is required to use one 'nc' */
1725 memset(seqc0+mins+lenc+n0t-a_res.max0,' ',nd-(n0t-a_res.max0));
1726 memset(seqc1+mins+lenc+nn1-a_res.max1,' ',nd-(nn1-a_res.max1));
1729 if ((nd-(n0t-a_res.max0))>0) {
1730 aancpy(seqc0+mins+lenc,(char *)f_str->aa0t+a_res.max0,
1731 n0t-a_res.max0,pst);
1732 memset(seqc0+mins+lenc+n0t-a_res.max0,' ',nd-(n0t-a_res.max0));
1734 else aancpy(seqc0+mins+lenc,(char *)f_str->aa0t+a_res.max0,nd,pst);
1735 if ((nd-(nn1-a_res.max1))>0) {
1736 aancpy(seqc1+mins+lenc,(char *)aa1p+a_res.max1,nn1-a_res.max1,pst);
1737 memset(seqc1+mins+lenc+nn1-a_res.max1,' ',nd-(nn1-a_res.max1));
1739 else aancpy(seqc1+mins+lenc,(char *)aa1p+a_res.max1,nd,pst);
1742 return mins+lenc+nd;
1746 calcons_a(const unsigned char *aa0, unsigned char *aa0a, int n0,
1747 const unsigned char *aa1, int n1,
1749 struct a_struct *aln,
1750 struct a_res_str a_res,
1752 char *seqc0, char *seqc0a, char *seqc1, char *seqca,
1753 char *ann_arr, struct f_struct *f_str)
1755 int i0, i1, nn1, n0t;
1756 int op, lenc, len_gap, nd, ns, itmp, p_ac, fnum, o_fnum;
1757 const unsigned char *aa1p;
1758 unsigned char *aa0ap;
1759 char *sp0, *sp0a, *sp1, *spa;
1771 aln->amin0 = a_res.min0;
1772 aln->amin1 = a_res.min1;
1773 aln->amax0 = a_res.max0;
1774 aln->amax1 = a_res.max1;
1776 /* first fill in the ends */
1777 n0 -= (f_str->nm0-1);
1779 if (min(a_res.min0,a_res.min1)<aln->llen || aln->showall==1)
1780 /* will we show all the start ?*/
1781 if (a_res.min0>=a_res.min1) { /* aa0 extends more to left */
1783 if (aln->showall==1) mins=a_res.min0;
1784 else mins = min(a_res.min0,aln->llen/2);
1785 aancpy(seqc0,(char *)f_str->aa0t+a_res.min0-mins,mins,pst);
1786 aln->smin0 = a_res.min0-mins;
1787 if ((mins-a_res.min1)>0) {
1788 memset(seqc1,' ',mins-a_res.min1);
1789 aancpy(seqc1+mins-a_res.min1,(char *)aa1p,a_res.min1,pst);
1793 aancpy(seqc1,(char *)aa1p+a_res.min1-mins,mins,pst);
1794 aln->smin1 = a_res.min1-mins;
1799 if (aln->showall == 1) mins=a_res.min1;
1800 else mins = min(a_res.min1,aln->llen/2);
1801 aancpy(seqc1,(char *)(aa1p+a_res.min1-mins),mins,pst);
1802 aln->smin1 = a_res.min1-mins;
1803 if ((mins-a_res.min0)>0) {
1804 memset(seqc0,' ',mins-a_res.min0);
1805 aancpy(seqc0+mins-a_res.min0,(char *)f_str->aa0t,a_res.min0,pst);
1809 aancpy(seqc0,(char *)f_str->aa0t+a_res.min0-mins,mins,pst);
1810 aln->smin0 = a_res.min0-mins;
1814 mins= min(aln->llen/2,min(a_res.min0,a_res.min1));
1816 aln->smin0=a_res.min0;
1817 aln->smin1=a_res.min1;
1818 aancpy(seqc0,(char *)f_str->aa0t+a_res.min0-mins,mins,pst);
1819 aancpy(seqc1,(char *)aa1p+a_res.min1-mins,mins,pst);
1822 memset(seqca,M_BLANK,mins);
1823 memset(seqc0a,' ', mins);
1825 /* now get the middle */
1832 n0t=lenc=len_gap=aln->nident=aln->nsim=aln->ngap_q=aln->ngap_l=op=p_ac= 0;
1836 /* op is the previous "match/insert" operator; *rp is the current
1837 operator or repeat count */
1839 o_fnum = f_str->aa0ti[i0];
1840 aa0ap = &aa0a[f_str->nmoff[o_fnum]+i0];
1842 while (i0 < a_res.max0 || i1 < a_res.max1) {
1843 fnum = f_str->aa0ti[i0];
1844 if (op == 0 && *rp == 0) { /* previous was match (or start), current is match */
1845 if (p_ac == 0) { /* previous code was a match */
1846 if (fnum != o_fnum) { /* continuing a match, but with a different fragment */
1847 aa0ap = &aa0a[f_str->nmoff[fnum]];
1852 p_ac = 0; o_fnum = fnum = f_str->aa0ti[i0];
1853 aa0ap = &aa0a[f_str->nmoff[fnum]];
1855 op = *rp++; /* get the next match/insert operator */
1857 /* get the alignment symbol */
1858 if ((itmp=pst.pam2[0][f_str->aa0t[i0]][aa1p[i1]])<0) { *spa = M_NEG; }
1859 else if (itmp == 0) { *spa = M_ZERO;}
1860 else {*spa = M_POS;}
1861 if (*spa == M_ZERO || *spa == M_POS) { aln->nsim++;}
1863 *sp0 = pst.sq[f_str->aa0t[i0++]]; /* get the residues for the consensus */
1864 *sp0a++ = ann_arr[*aa0ap++];
1865 *sp1 = pst.sq[aa1p[i1++]];
1868 if (toupper(*sp0) == toupper(*sp1)) {aln->nident++; *spa = M_IDENT;}
1869 sp0++; sp1++; spa++;
1871 else { /* either op != 0 (previous was insert) or *rp != 0
1872 (current is insert) */
1873 if (op==0) { op = *rp++;} /* previous was match, start insert */
1874 /* previous was insert - count through gap */
1876 p_ac = 1; fnum = f_str->aa0ti[i0];
1880 *sp1++ = pst.sq[aa1p[i1++]];
1889 *sp0a = *spa = '\0';
1891 /* now we have the middle, get the right end */
1893 ns = mins + lenc + aln->llen;
1894 ns -= (itmp = ns %aln->llen);
1895 if (itmp>aln->llen/2) ns += aln->llen;
1896 nd = ns - (mins+lenc);
1897 if (nd > max(n0t-a_res.max0,nn1-a_res.max1)) nd = max(n0t-a_res.max0,nn1-a_res.max1);
1899 if (aln->showall==1) {
1900 nd = max(n0t-a_res.max0,nn1-a_res.max1); /* reset for showall=1 */
1902 aancpy(seqc0+mins+lenc,(char *)f_str->aa0t+a_res.max0,n0t-a_res.max0,pst);
1903 aancpy(seqc1+mins+lenc,(char *)aa1p+a_res.max1,nn1-a_res.max1,pst);
1904 /* fill with blanks - this is required to use one 'nc' */
1905 memset(seqc0+mins+lenc+n0t-a_res.max0,' ',nd-(n0t-a_res.max0));
1906 memset(seqc1+mins+lenc+nn1-a_res.max1,' ',nd-(nn1-a_res.max1));
1909 if ((nd-(n0t-a_res.max0))>0) {
1910 aancpy(seqc0+mins+lenc,(char *)f_str->aa0t+a_res.max0,
1911 n0t-a_res.max0,pst);
1912 memset(seqc0+mins+lenc+n0t-a_res.max0,' ',nd-(n0t-a_res.max0));
1914 else aancpy(seqc0+mins+lenc,(char *)f_str->aa0t+a_res.max0,nd,pst);
1915 if ((nd-(nn1-a_res.max1))>0) {
1916 aancpy(seqc1+mins+lenc,(char *)aa1p+a_res.max1,nn1-a_res.max1,pst);
1917 memset(seqc1+mins+lenc+nn1-a_res.max1,' ',nd-(nn1-a_res.max1));
1919 else aancpy(seqc1+mins+lenc,(char *)aa1p+a_res.max1,nd,pst);
1921 return mins+lenc+nd;
1924 void aaptrshuffle(unsigned char *res, int n) {
1929 for( i = n; --i; ) {
1931 /* j = nrand(i); if (i == j) continue; */ /* shuffle */
1932 j = (n - 1) - i; if (i <= j ) break; /* reverse */
1940 void aa0shuffle(unsigned char *aa0, int n0, struct f_struct *f_str) {
1945 for(i = 0 ; i < f_str->nm0 ; i++) { /* for each fragment */
1947 aaptrshuffle(&(aa0[f_str->nmoff[i]]),
1948 f_str->nmoff[i+1] - f_str->nmoff[i] - 1 );
1954 /* build an array of match/ins/del - length strings */
1956 calc_code(const unsigned char *aa0, const int n0,
1957 const unsigned char *aa1, const int n1,
1958 struct a_struct *aln,
1959 struct a_res_str a_res,
1961 char *al_str, int al_str_n, struct f_struct *f_str)
1964 int op, lenc, len_gap;
1966 const unsigned char *aa1p;
1971 int o_fnum,fnum = 0;
1973 if (pst.ext_sq_set) {sq = pst.sqx;}
1984 aln->amin0 = a_res.min0;
1985 aln->amin1 = a_res.min1;
1986 aln->amax0 = a_res.max0;
1987 aln->amax1 = a_res.max1;
1990 lenc = len_gap =aln->nident=aln->nsim=aln->ngap_q=aln->ngap_l=aln->nfs=op=p_ac = 0;
1993 i0 = a_res.min0; /* start in aa0 (f_str->aa0t) */
1994 i1 = a_res.min1; /* start in aa1 */
1997 o_fnum = f_str->aa0ti[i0] + 1; /* fragment number */
1998 while (i0 < a_res.max0 || i1 < a_res.max1) {
1999 fnum = f_str->aa0ti[i0]+1;
2000 if (op == 0 && *rp == 0) { /* previous was match, this is match */
2001 if (p_ac == 0) { /* previous code was a match */
2002 if (fnum == o_fnum) { op_cnt++;}
2003 else { /* continuing a match, but with a different fragment */
2004 update_code(al_str,al_str_n-strlen(al_str), p_ac, op_cnt, o_fnum);
2010 update_code(al_str,al_str_n-strlen(al_str),p_ac,op_cnt,o_fnum);
2011 op_cnt = 1; p_ac = 0; o_fnum = fnum = f_str->aa0ti[i0] + 1;
2015 if (pst.pam2[0][f_str->aa0t[i0]][aa1p[i1]]>=0) {aln->nsim++;}
2016 sp0 = pst.sq[f_str->aa0t[i0++]];
2017 sp1 = pst.sq[aa1p[i1++]];
2018 if (toupper(sp0) == toupper(sp1)) aln->nident++;
2021 if (op==0) op = *rp++;
2022 if (p_ac == 1) { op_cnt++;}
2024 update_code(al_str,al_str_n - strlen(al_str),p_ac,op_cnt,o_fnum);
2025 op_cnt = 1; p_ac = 1; fnum = f_str->aa0ti[i0] + 1;
2027 op--; lenc++; i1++; len_gap++;
2030 update_code(al_str,al_str_n - strlen(al_str),p_ac,op_cnt,o_fnum);
2032 return lenc - len_gap;
2035 /* update_code(): if "op" == 0, this is the end of a match of length
2036 "op_cnt" involving fragment "fnum"
2037 otherwise, this is an insertion (op==1) or deletion (op==2)
2041 update_code(char *al_str, int al_str_max, int op, int op_cnt, int fnum) {
2043 char op_char[4]={"=-+"};
2047 sprintf(tmp_cnt,"%c%d[%d]",op_char[op],op_cnt,fnum);
2049 sprintf(tmp_cnt,"%c%d",op_char[op],op_cnt);
2051 strncat(al_str,tmp_cnt,al_str_max);
2055 calc_id(const unsigned char *aa0, int n0,
2056 const unsigned char *aa1, int n1,
2057 struct a_struct *aln,
2058 struct a_res_str a_res,
2060 struct f_struct *f_str)
2063 int op, lenc, len_gap;
2064 const unsigned char *aa1p;
2077 aln->amin0 = a_res.min0;
2078 aln->amin1 = a_res.min1;
2079 aln->amax0 = a_res.max0;
2080 aln->amax1 = a_res.max1;
2082 /* first fill in the ends */
2083 n0 -= (f_str->nm0-1);
2085 /* now get the middle */
2087 lenc=len_gap=aln->nident=aln->nsim=aln->ngap_q = aln->ngap_l = aln->nfs = op = 0;
2091 while (i0 < a_res.max0 || i1 < a_res.max1) {
2092 if (op == 0 && *rp == 0) {
2095 if (pst.pam2[0][f_str->aa0t[i0]][aa1p[i1]]>=0) {aln->nsim++;}
2097 sp0 = pst.sq[f_str->aa0t[i0++]];
2098 sp1 = pst.sq[aa1p[i1++]];
2100 if (toupper(sp0) == toupper(sp1)) aln->nident++;
2103 if (op==0) { op = *rp++;}
2110 return lenc-len_gap;
2115 #include "structs.h"
2119 update_params(struct qmng_str *qm_msg,
2120 struct mngmsg *m_msg, struct pstruct *ppst)
2122 m_msg->n0 = ppst->n0 = qm_msg->n0;
2123 m_msg->nm0 = qm_msg->nm0;
2124 m_msg->escore_flg = qm_msg->escore_flg;
2125 m_msg->qshuffle = qm_msg->qshuffle;