--- /dev/null
+
+/* copyright (c) 1998, 1999 William R. Pearson and the U. of Virginia */
+
+/* $Name: fa_34_26_5 $ - $Id: dropfz2.c,v 1.57 2007/04/26 18:37:19 wrp Exp $ */
+
+/* 18-Sept-2006 - removed static global variables for alignment */
+
+/* 2002/06/23 finally correctly implement fix to translate 'N' to 'X' */
+
+/* 1999/11/29 modification by Z. Zhang to translate DNA 'N' as 'X' */
+
+/* implements an improved version of the fasty algorithm, see:
+
+ W. R. Pearson, T. Wood, Z. Zhang, A W. Miller (1997) "Comparison of
+ DNA sequences with protein sequences" Genomics 46:24-36
+
+*/
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include <ctype.h>
+
+#include "defs.h"
+#include "param.h"
+#define XTERNAL
+#include "upam.h"
+#include "uascii.h"
+
+#define NT_N 16
+
+/* globals for fasta */
+#define MAXWINDOW 64
+
+#ifndef MAXSAV
+#define MAXSAV 10
+#endif
+
+#ifndef ALLOCN0
+static char *verstr="3.5 Sept 2006";
+#else
+static char *verstr="3.5an0 Sept 2006";
+#endif
+
+struct dstruct /* diagonal structure for saving current run */
+{
+ int score; /* hash score of current match */
+ int start; /* start of current match */
+ int stop; /* end of current match */
+ struct savestr *dmax; /* location in vmax[] where best score data saved */
+};
+
+struct savestr
+{
+ int score; /* pam score with segment optimization */
+ int score0; /* pam score of best single segment */
+ int gscore; /* score from global match */
+ int dp; /* diagonal of match */
+ int start; /* start of match in lib seq */
+ int stop; /* end of match in lib seq */
+};
+
+void savemax();
+void kpsort();
+
+struct sx_s {int C1, C2, C3, I1, I2, I3, flag; };
+
+struct wgt { int iii, ii, iv;};
+struct wgtc {char c2, c3, c4, c5;};
+
+typedef struct st_s { int C, I, D;} *st_ptr;
+
+struct f_struct {
+ struct dstruct *diag;
+ struct savestr vmax[MAXSAV]; /* best matches saved for one sequence */
+ struct savestr *vptr[MAXSAV];
+ struct savestr *lowmax;
+ int ndo;
+ int noff;
+ int hmask; /* hash constants */
+ int *pamh1; /* pam based array */
+ int *pamh2; /* pam based kfact array */
+ int *link, *harr; /* hash arrays */
+ int kshft; /* shift width */
+ int nsav, lowscor; /* number of saved runs, worst saved run */
+#ifndef TFAST
+ unsigned char *aa0x, *aa0v; /* aa0x - 111122223333 */
+#else
+ unsigned char *aa1x, *aa1v; /* aa1x - 111122223333 */
+#endif /* aa1v - computed codons */
+ struct sx_s *cur;
+ struct wgt **weight0;
+ struct wgt **weight1;
+ struct wgtc **weight_c;
+ int *waa;
+ int *res;
+ int max_res;
+ st_ptr up, down, tp;
+};
+
+#define DROP_INTERN
+#include "drop_func.h"
+
+static int dmatchx(const unsigned char *aa0, int n0,
+ const unsigned char *aa1, int n1,
+ int hoff, int window,
+ int **pam2, int gdelval, int ggapval, int gshift,
+ struct f_struct *f_str);
+
+int shscore(unsigned char *aa0, int n0, int **pam2);
+int saatran(const unsigned char *ntseq, unsigned char *aaseq, int maxs, int frame);
+int spam (const unsigned char *aa0, const unsigned char *aa1,
+ struct savestr *dmax, int **pam2,
+ struct f_struct *f_str);
+int sconn (struct savestr **v, int n,int cgap, int pgap, struct f_struct *f_str);
+int lx_band(const unsigned char *prot_seq, int len_prot,
+ const unsigned char *dna_prot_seq, int len_dna_prot,
+ int **pam_matrix, int gopen, int gext,
+ int gshift, int start_diag, int width, struct f_struct *f_str);
+static void update_code(char *al_str, int al_str_max, int op, int op_cnt, char *op_char);
+extern void w_abort (char *p, char *p1);
+extern void aagetmap(char *to, int n);
+
+/* initialize for fasta */
+/* modified 30-August-1999 by Zheng Zhang to work with an extended alphabet */
+/* Assume naa=47, and wgts[47][23] matches both upper and lower case
+amoino acids with another amino acid. And also assume the DNA letter
+does not have upper/lower case difference. If you also allow DNA
+sequence to be upper/lower case letters, more needs be changed. Not
+only here, but also in the alignment code, the way that pack a codon
+into a number between 0-63 need be changed. */
+
+/* modified so that if **weightci==NULL, do not fiddle with characters */
+
+void
+init_weights(struct wgt ***weighti, struct wgtc ***weightci,
+ int **wgts, int gshift, int gsubs, int naa)
+{
+ int i, j, do_wgtc=0;
+ int aa, b, a, x, y, z;
+ int *wwt, e;
+ struct wgt **weight;
+ struct wgtc **weightc;
+ char aacmap[64];
+ int temp[49][64]; /*change*/
+ char le[49][64];
+
+
+ if ((*weighti=(struct wgt **)calloc((size_t)(naa+1),sizeof(struct wgt *)))
+ ==NULL) {
+ fprintf(stderr," cannot allocate weights array: %d\n",naa);
+ exit(1);
+ }
+
+ weight = *weighti;
+ for (aa=0; aa <= naa; aa++) {
+ if ((weight[aa]=(struct wgt *)calloc((size_t)256,sizeof(struct wgt)))
+ ==NULL) {
+ fprintf(stderr," cannot allocate weight[]: %d/%d\n",aa,naa);
+ exit(1);
+ }
+ }
+
+ if (weightci !=NULL) {
+ if ((*weightci=(struct wgtc **)calloc((size_t)(naa+1),
+ sizeof(struct wgtc *)))==NULL) {
+ fprintf(stderr," cannot allocate weight_c array: %d\n",naa);
+ exit(1);
+ }
+ weightc = *weightci;
+
+ for (aa=0; aa <= naa; aa++) {
+ if ((weightc[aa]=(struct wgtc *)calloc((size_t)256,sizeof(struct wgtc)))
+ ==NULL) {
+ fprintf(stderr," cannot allocate weightc[]: %d/%d\n",aa,naa);
+ exit(1);
+ }
+ }
+ do_wgtc = 1;
+ }
+ else do_wgtc = 0;
+
+ aagetmap(aacmap,64);
+
+ for (aa = 0; aa <= naa; aa++) { /* change*/
+ wwt = wgts[aa];
+ for (i = 0; i < 64; i++) { /* j iterates through the codons */
+ x = -1000;
+ y = i;
+ for (j = 0; j < 64; j++) { /* j iterates through the codons */
+ z = ((~i & j) | (i & ~j));
+ b = 0; /* score = 0 */
+ if (z % 4) b-= gsubs;
+ if (z /16) b-= gsubs;
+ if ((z /4) % 4) b -= gsubs;
+ b += wwt[aascii[aacmap[j]]]; /* add the match score for char j*/
+ if (b > x) {
+ x = b; /* x has the score */
+ y = j; /* y has the character */
+ }
+ }
+ /* if (y < 0 || y > 63) printf("%d %d %d %d ",aa, i, x, y); */
+ temp[aa][i] = x;
+ le[aa][i] = y;
+ }
+ /* printf("\n"); */
+ }
+
+ for (aa= 0; aa <= naa; aa++) {
+ wwt = temp[aa];
+ for (i = 0; i < 256; i++) {
+ for (x=-100,b = 0; b < 4; b++) {
+ z = (i/ (1 << ((b+1)*2)))*(1<<(b*2))+(i%(1<<(b*2)));
+ if (x < (e=wwt[z])) {
+ x = e;
+ if (do_wgtc) weightc[aa][i].c4 = aacmap[le[aa][z]];
+ }
+ }
+ weight[aa][i].iv=x-gshift;
+ weight[aa][i].iii = wwt[i%64];
+
+ if (do_wgtc) {
+ weightc[aa][i].c5 = aacmap[le[aa][i%64]];
+ weightc[aa][i].c3 = aacmap[i%64];
+ }
+ x = i %16;
+ for (y = -100, b = 0; b < 3; b++) {
+ z = ((x >> (b*2)) << (b*2+2)) + (x % (1 << (b*2)));
+ for (a = 0; a < 4; a++) {
+ if ((e =wwt[z+(a<<(b*2))]) > y) {
+ y = e;
+ if (do_wgtc)
+ weightc[aa][i].c2 = aacmap[le[aa][z+(a<<(b*2))]];
+ }
+ }
+ }
+ weight[aa][i].ii = y-gshift;
+ }
+ }
+ /*106=CGGG*/
+ for (aa = 0; aa <= naa; aa++) {
+ weight[aa][106].iii = wgts[aa][23]; /* is 23 the code for 'X'?*/
+ weight[aa][106].iv = weight[aa][106].ii = weight[aa][106].iii-gshift;
+ if (do_wgtc) {
+ weightc[aa][106].c5 = weightc[aa][106].c4 = weightc[aa][106].c3
+ = weightc[aa][106].c2 = 'X';
+ }
+ }
+}
+
+void
+free_weights(struct wgt ***weighti0, struct wgt ***weighti1,
+ struct wgtc ***weightci, int naa)
+{
+ int aa;
+ struct wgt **weight0;
+ struct wgt **weight1;
+ struct wgtc **weightc;
+
+ weight0 = *weighti0;
+ weight1 = *weighti1;
+ weightc = *weightci;
+
+ for (aa=0; aa <= naa; aa++) {free(weight0[aa]);}
+ for (aa=0; aa <= naa; aa++) {free(weight1[aa]);}
+ for (aa=0; aa <= naa; aa++) {free(weightc[aa]);}
+
+ free(weight0);
+ free(weight1);
+ free(weightc);
+}
+
+static void
+pre_com(const unsigned char *aa0, int n0, unsigned char *aa0v)
+{
+ int dnav, i;
+ dnav = (hnt[aa0[0]]<<2) + hnt[aa0[1]];
+ for (i=2; i<n0; i++) {
+ dnav = ((dnav<<2)+hnt[aa0[i]])&255;
+ if (aa0[i] == NT_N || aa0[i-1]==NT_N || aa0[i-2] == NT_N)
+ aa0v[i-2] = 106;
+ else {
+ if (dnav == 106/*CGGG*/) dnav = 42/*AGGG*/;
+ aa0v[i-2]=dnav;
+ }
+ }
+}
+
+static void
+pre_com_r(const unsigned char *aa0, int n0, unsigned char *aa0v)
+{
+ int dnav, i, ir;
+ dnav = (3-hnt[aa0[n0-1]]<<2) + 3-hnt[aa0[n0-2]];
+ for (i=2, ir=n0-3; i<n0; i++,ir--) {
+ dnav = ((dnav<<2)+3-hnt[aa0[ir]])&255;
+ if (aa0[ir] == NT_N || aa0[ir+1]==NT_N || aa0[ir+2] == NT_N)
+ aa0v[i-2] = 106;
+ else {
+ if (dnav == 106) dnav = 42;
+ aa0v[i-2]=dnav;
+ }
+ }
+}
+
+void
+init_work (unsigned char *aa0, int n0,
+ struct pstruct *ppst,
+ struct f_struct **f_arg)
+{
+ int mhv, phv;
+ int hmax;
+ int i0, hv;
+ int pamfact;
+ int btemp;
+ struct f_struct *f_str;
+ struct bdstr *bss;
+ /* these used to be globals, but do not need to be */
+ int ktup, fact, kt1, lkt;
+
+ int maxn0;
+ int *pwaa;
+ int i, j, q;
+ struct swstr *ss, *r_ss;
+ int *waa;
+ int *res;
+ int nsq, ip, *hsq, naat;
+#ifndef TFAST
+ int last_n0, itemp, dnav;
+ unsigned char *fd, *fs, *aa0x, *aa0v;
+ int n0x, n0x3;
+#endif
+
+ if (nt[NT_N] != 'N') {
+ fprintf(stderr," nt[NT_N] (%d) != 'X' (%c) - recompile\n",NT_N,nt[NT_N]);
+ exit(1);
+ }
+
+ if (ppst->ext_sq_set) {
+ nsq = ppst->nsqx; ip = 1;
+ hsq = ppst->hsqx;
+ }
+ else {
+ nsq = ppst->nsq; ip = 0;
+ hsq = ppst->hsq;
+ }
+
+ f_str = (struct f_struct *)calloc(1,sizeof(struct f_struct));
+
+ btemp = 2 * ppst->param_u.fa.bestoff / 3 +
+ n0 / ppst->param_u.fa.bestscale +
+ ppst->param_u.fa.bkfact *
+ (ppst->param_u.fa.bktup - ppst->param_u.fa.ktup);
+ btemp = min (btemp, ppst->param_u.fa.bestmax);
+ if (btemp > 3 * n0) btemp = 3 * shscore(aa0,n0,ppst->pam2[0]) / 5;
+
+ ppst->param_u.fa.cgap = btemp + ppst->param_u.fa.bestoff / 3;
+ if (ppst->param_u.fa.optcut_set != 1)
+#ifndef TFAST
+ ppst->param_u.fa.optcut = (btemp*5)/4;
+#else
+ ppst->param_u.fa.optcut = (btemp*4)/3;
+#endif
+
+#ifdef OLD_FASTA_GAP
+ ppst->param_u.fa.pgap = ppst->gdelval + ppst->ggapval;
+#else
+ ppst->param_u.fa.pgap = ppst->gdelval + 2*ppst->ggapval;
+#endif
+ pamfact = ppst->param_u.fa.pamfact;
+ ktup = ppst->param_u.fa.ktup;
+ fact = ppst->param_u.fa.scfact * ktup;
+
+#ifndef TFAST
+ /* before hashing, we must set up some space and translate the sequence */
+
+ maxn0 = n0 + 2;
+ if ((aa0x =(unsigned char *)calloc((size_t)maxn0,
+ sizeof(unsigned char)))
+ == NULL) {
+ fprintf (stderr, "cannot allocate aa0x array %d\n", maxn0);
+ exit (1);
+ }
+ aa0x++;
+ f_str->aa0x = aa0x;
+
+
+ if ((aa0v =(unsigned char *)calloc((size_t)maxn0,
+ sizeof(unsigned char)))
+ == NULL) {
+ fprintf (stderr, "cannot allocate aa0v array %d\n", maxn0);
+ exit (1);
+ }
+ aa0v++;
+ f_str->aa0v = aa0v;
+
+ /* make a precomputed codon number series */
+ pre_com(aa0, n0, aa0v);
+
+ last_n0 = 0;
+ for (itemp=0; itemp<3; itemp++) {
+ n0x=saatran(aa0,&aa0x[last_n0],n0,itemp);
+ /* for (i=0; i<n0x; i++) {
+ fprintf(stderr,"%c",aa[aa0x[last_n0+i]]);
+ if ((i%60)==59) fprintf(stderr,"\n");
+ }
+ fprintf(stderr,"\n");
+ */
+ last_n0 += n0x+1;
+ }
+
+ /* fprintf(stderr,"\n"); */
+ n0x = n0;
+ n0x3 = n0x/3;
+
+ /* now switch aa0 and aa0x for hashing functions */
+ fs = aa0;
+ aa0 = aa0x;
+ aa0x = fs;
+#endif
+
+ if (ppst->ext_sq_set) naat = MAXLC;
+ else naat = MAXUC;
+
+ init_weights(&f_str->weight0, NULL,
+ ppst->pam2[ip],-ppst->gshift,-ppst->gsubs,naat);
+ init_weights(&f_str->weight1, &f_str->weight_c,
+ ppst->pam2[0],-ppst->gshift,-ppst->gsubs,naat);
+
+ if (pamfact == -1)
+ pamfact = 0;
+ else if (pamfact == -2)
+ pamfact = 1;
+
+ for (i0 = 1, mhv = -1; i0 <= ppst->nsq; i0++)
+ if (hsq[i0] < NMAP && hsq[i0] > mhv)
+ mhv = ppst->hsq[i0];
+
+ if (mhv <= 0)
+ {
+ fprintf (stderr, " maximum hsq <=0 %d\n", mhv);
+ exit (1);
+ }
+
+ for (f_str->kshft = 0; mhv > 0; mhv /= 2) f_str->kshft++;
+
+/* kshft = 2; */
+ kt1 = ktup - 1;
+ hv = 1;
+ for (i0 = 0; i0 < ktup; i0++)
+ hv = hv << f_str->kshft;
+ hmax = hv;
+ f_str->hmask = (hmax >> f_str->kshft) - 1;
+
+ if ((f_str->harr = (int *) calloc (hmax, sizeof (int))) == NULL) {
+ fprintf (stderr, " cannot allocate hash array\n");
+ exit (1);
+ }
+ if ((f_str->pamh1 = (int *) calloc (ppst->nsq+1, sizeof (int))) == NULL) {
+ fprintf (stderr, " cannot allocate pamh1 array\n");
+ exit (1);
+ }
+ if ((f_str->pamh2 = (int *) calloc (hmax, sizeof (int))) == NULL) {
+ fprintf (stderr, " cannot allocate pamh2 array\n");
+ exit (1);
+ }
+ if ((f_str->link = (int *) calloc (n0, sizeof (int))) == NULL) {
+ fprintf (stderr, " cannot allocate hash link array");
+ exit (1);
+ }
+
+ for (i0 = 0; i0 < hmax; i0++)
+ f_str->harr[i0] = -1;
+ for (i0 = 0; i0 < n0; i0++)
+ f_str->link[i0] = -1;
+
+ /* encode the aa0 array */
+ phv = hv = 0;
+ lkt = kt1;
+ for (i0 = 0; i0 < min(n0,lkt); i0++) {
+ if (hsq[aa0[i0]] >= NMAP) {
+ hv=phv=0; lkt = i0+ktup; continue;
+ }
+ hv = (hv << f_str->kshft) + ppst->hsq[aa0[i0]];
+ phv += ppst->pam2[ip][aa0[i0]][aa0[i0]] * ktup;
+ }
+
+ for (; i0 < n0; i0++) {
+ if (hsq[aa0[i0]] >= NMAP) {
+ hv=phv=0;
+ /* restart hv, phv calculation */
+ for (lkt = i0+kt1; (i0 < lkt || hsq[aa0[i0]]>=NMAP) && i0<n0; i0++) {
+ if (hsq[aa0[i0]] >= NMAP) {
+ hv=phv=0;
+ lkt = i0+ktup;
+ continue;
+ }
+ hv = (hv << f_str->kshft) + hsq[aa0[i0]];
+ phv += ppst->pam2[ip][aa0[i0]][aa0[i0]]*ktup;
+ }
+ }
+ if (i0 >= n0) break;
+ hv = ((hv & f_str->hmask) << f_str->kshft) + ppst->hsq[aa0[i0]];
+ f_str->link[i0] = f_str->harr[hv];
+ f_str->harr[hv] = i0;
+ if (pamfact) {
+ f_str->pamh2[hv] = (phv += ppst->pam2[ip][aa0[i0]][aa0[i0]] * ktup);
+ if (hsq[aa0[i0-kt1]] < NMAP)
+ phv -= ppst->pam2[ip][aa0[i0 - kt1]][aa0[i0 - kt1]] * ktup;
+ }
+ else f_str->pamh2[hv] = fact * ktup;
+ }
+
+/* this has been modified from 0..<ppst->nsq to 1..<=ppst->nsq because the
+ pam2[0][0] is now undefined for consistency with blast
+*/
+
+ if (pamfact)
+ for (i0 = 1; i0 <= ppst->nsq; i0++)
+ f_str->pamh1[i0] = ppst->pam2[ip][i0][i0] * ktup;
+ else
+ for (i0 = 1; i0 <= ppst->nsq; i0++)
+ f_str->pamh1[i0] = fact;
+
+ f_str->ndo = 0; /* used to save time on diagonals with long queries */
+
+
+#ifndef ALLOCN0
+ if ((f_str->diag = (struct dstruct *) calloc ((size_t)MAXDIAG,
+ sizeof (struct dstruct)))==NULL) {
+ fprintf (stderr," cannot allocate diagonal arrays: %lu\n",
+ MAXDIAG *sizeof (struct dstruct));
+ exit (1);
+ };
+#else
+ if ((f_str->diag = (struct dstruct *) calloc ((size_t)n0,
+ sizeof (struct dstruct)))==NULL) {
+ fprintf (stderr," cannot allocate diagonal arrays: %ld\n",
+ (long)n0*sizeof (struct dstruct));
+ exit (1);
+ };
+#endif
+
+#ifndef TFAST
+ /* done hashing, now switch aa0, aa0x back */
+ fs = aa0;
+ aa0 = aa0x;
+ aa0x = fs;
+#else
+ if ((f_str->aa1x =(unsigned char *)calloc((size_t)ppst->maxlen+4,
+ sizeof(unsigned char)))
+ == NULL) {
+ fprintf (stderr, "cannot allocate aa1x array %d\n", ppst->maxlen+4);
+ exit (1);
+ }
+ f_str->aa1x++;
+
+ if ((f_str->aa1v =(unsigned char *)calloc((size_t)ppst->maxlen+4,
+ sizeof(unsigned char))) == NULL) {
+ fprintf (stderr, "cannot allocate aa1v array %d\n", ppst->maxlen+4);
+ exit (1);
+ }
+ f_str->aa1v++;
+
+#endif
+
+ if ((waa= (int *)malloc (sizeof(int)*(ppst->nsq+1)*n0)) == NULL) {
+ fprintf(stderr,"cannot allocate waa struct %3d\n",ppst->nsq*n0);
+ exit(1);
+ }
+
+ pwaa = waa;
+ for (i=0; i<=ppst->nsq; i++) {
+ for (j=0;j<n0; j++) {
+ *pwaa = ppst->pam2[ip][i][aa0[j]];
+ pwaa++;
+ }
+ }
+ f_str->waa = waa;
+
+#ifndef TFAST
+ maxn0 = max(2*n0,MIN_RES);
+#else
+ maxn0 = max(4*n0,MIN_RES);
+#endif
+ if ((res = (int *)calloc((size_t)maxn0,sizeof(int)))==NULL) {
+ fprintf(stderr,"cannot allocate alignment results array %d\n",maxn0);
+ exit(1);
+ }
+ f_str->res = res;
+ f_str->max_res = maxn0;
+
+ *f_arg = f_str;
+}
+
+/* pstring1 is a message to the manager, currently 512 */
+/* pstring2 is the same information, but in a markx==10 format */
+void
+get_param (struct pstruct *pstr, char *pstring1, char *pstring2)
+{
+#ifndef TFAST
+ char *pg_str="FASTY";
+#else
+ char *pg_str="TFASTY";
+#endif
+
+ if (!pstr->param_u.fa.optflag)
+#ifdef OLD_FASTA_GAP
+ sprintf (pstring1, "%s (%s) function [%s matrix (%d:%d)%s] ktup: %d\n join: %d, gap-pen: %d/%d, shift: %d subs: %d width: %3d",pg_str,verstr,
+#else
+ sprintf (pstring1, "%s (%s) function [%s matrix (%d:%d)%s] ktup: %d\n join: %d, open/ext: %d/%d, shift: %d subs: %d width: %3d",pg_str,verstr,
+#endif
+ pstr->pamfile, pstr->pam_h,pstr->pam_l,
+ (pstr->ext_sq_set) ? "xS":"\0",
+ pstr->param_u.fa.ktup, pstr->param_u.fa.cgap,
+ pstr->gdelval, pstr->ggapval, pstr->gshift, pstr->gsubs,
+ pstr->param_u.fa.optwid);
+ else
+#ifdef OLD_FASTA_GAP
+ sprintf (pstring1, "%s (%s) function [optimized, %s matrix (%d:%d)%s] ktup: %d\n join: %d, opt: %d, gap-pen: %3d/%3d shift: %3d, subs: %3d width: %3d",pg_str,verstr,
+#else
+ sprintf (pstring1, "%s (%s) function [optimized, %s matrix (%d:%d)%s] ktup: %d\n join: %d, opt: %d, open/ext: %3d/%3d shift: %3d, subs: %3d width: %3d",pg_str,verstr,
+#endif
+ pstr->pamfile, pstr->pam_h,pstr->pam_l,
+ (pstr->ext_sq_set) ? "xS":"\0",
+ pstr->param_u.fa.ktup, pstr->param_u.fa.cgap,
+ pstr->param_u.fa.optcut, pstr->gdelval, pstr->ggapval,
+ pstr->gshift,pstr->gsubs,pstr->param_u.fa.optwid);
+
+ if (pstr->param_u.fa.iniflag) strcat(pstring1," init1");
+ /*
+ if (pstr->zsflag==0) strcat(pstring1," not-scaled");
+ else if (pstr->zsflag==1) strcat(pstring1," reg.-scaled");
+ */
+
+ if (pstring2 != NULL) {
+#ifdef OLD_FASTA_GAP
+ sprintf (pstring2, "; pg_name: %s\n; pg_ver: %s\n; pg_matrix: %s (%d:%d)%s\n\
+; pg_gap-pen: %d %d\n; pg_ktup: %d\n; pg_optcut: %d\n; pg_cgap: %d\n",
+#else
+ sprintf (pstring2, "; pg_name: %s\n; pg_ver: %s\n; pg_matrix: %s (%d:%d)%s\n\
+; pg_open-ext: %d %d\n; pg_ktup: %d\n; pg_optcut: %d\n; pg_cgap: %d\n",
+#endif
+ pg_str,verstr,pstr->pamfile, pstr->pam_h,pstr->pam_l,
+ (pstr->ext_sq_set) ? "xS":"\0", pstr->gdelval,
+ pstr->ggapval,pstr->param_u.fa.ktup,pstr->param_u.fa.optcut,
+ pstr->param_u.fa.cgap);
+ }
+}
+
+void
+close_work (const unsigned char *aa0, int n0,
+ struct pstruct *ppst,
+ struct f_struct **f_arg)
+{
+ struct f_struct *f_str;
+ int naat;
+
+ f_str = *f_arg;
+
+ if (f_str != NULL) {
+ if (ppst->ext_sq_set) naat = MAXLC;
+ else naat = MAXUC;
+ free_weights(&f_str->weight0,&f_str->weight1,&f_str->weight_c,naat);
+ free(f_str->cur);
+#ifndef TFAST
+ f_str->aa0v--;
+ free(f_str->aa0v);
+ f_str->aa0x--;
+ free(f_str->aa0x);
+#else /* TFAST */
+ f_str->aa1x--;
+ free(f_str->aa1x);
+ f_str->aa1v--;
+ free(f_str->aa1v);
+#endif
+ free(f_str->res);
+ free(f_str->waa);
+ free(f_str->diag);
+ free(f_str->link);
+ free(f_str->pamh2);
+ free(f_str->pamh1);
+ free(f_str->harr);
+ free(f_str);
+ *f_arg = NULL;
+ }
+}
+
+void do_fasta (const unsigned char *aa0, int n0,
+ const unsigned char *aa1, int n1,
+ struct pstruct *ppst, struct f_struct *f_str,
+ struct rstruct *rst, int *hoff)
+{
+ int nd; /* diagonal array size */
+ int lhval;
+ int kfact;
+ int i;
+ register struct dstruct *dptr;
+ register int tscor;
+ int xdebug = 0;
+
+#ifndef ALLOCN0
+ register struct dstruct *diagp;
+#else
+ register int dpos;
+ int lposn0;
+#endif
+ struct dstruct *dpmax;
+ register int lpos;
+ int tpos;
+ struct savestr *vmptr;
+ int scor, tmp;
+ int im, ib, nsave;
+ int ktup, kt1, *hsq, ip, lkt;
+#ifndef TFAST
+ int n0x31, n0x32;
+ n0x31 = (n0-2)/3;
+ n0x32 = n0x31+1+(n0-n0x31-1)/2;
+#else
+ unsigned char *fs, *fd;
+ int n1x31, n1x32, last_n1, itemp;
+ n1x31 = (n1-2)/3;
+ n1x32 = n1x31+1+(n1-n1x31-1)/2;
+#endif
+
+ if (ppst->ext_sq_set) {
+ ip = 1;
+ hsq = ppst->hsqx;
+ }
+ else {
+ ip = 0;
+ hsq = ppst->hsq;
+ }
+
+ ktup = ppst->param_u.fa.ktup;
+ kt1 = ktup-1;
+
+ if (n1 < ktup) {
+ rst->score[0] = rst->score[1] = rst->score[2] = 0;
+ return;
+ }
+
+ if (n0+n1+1 >= MAXDIAG) {
+ fprintf(stderr,"n0,n1 too large: %d, %d\n",n0,n1);
+ rst->score[0] = rst->score[1] = rst->score[2] = -1;
+ return;
+ }
+
+ f_str->noff = n0 - 1;
+
+#ifdef ALLOCN0
+ nd = n0;
+#endif
+
+#ifndef ALLOCN0
+ nd = n0 + n1;
+#endif
+
+ dpmax = &f_str->diag[nd];
+ for (dptr = &f_str->diag[f_str->ndo]; dptr < dpmax;)
+ {
+ dptr->stop = -1;
+ dptr->dmax = NULL;
+ dptr++->score = 0;
+ }
+
+ for (vmptr = f_str->vmax; vmptr < &f_str->vmax[MAXSAV]; vmptr++)
+ vmptr->score = 0;
+ f_str->lowmax = f_str->vmax;
+ f_str->lowscor = 0;
+
+ if (n1 > 1000 && aa1[0]==23 && aa1[100]==23 &&
+ aa1[1400]==23 && aa1[1401]!=23) {
+ xdebug = 1;
+ }
+ else xdebug = 0;
+
+ /* start hashing */
+ lhval = 0;
+ lkt = kt1;
+ for (lpos = 0; (lpos < lkt || hsq[aa1[lpos]]>=NMAP) && lpos<n1; lpos++) {
+ /* restart lhval calculation */
+ if (hsq[aa1[lpos]]>=NMAP) {
+ lhval = 0; lkt=lpos+ktup;
+ continue;
+ }
+ lhval = ((lhval & f_str->hmask) << f_str->kshft) + ppst->hsq[aa1[lpos]];
+ }
+
+#ifndef ALLOCN0
+ diagp = &f_str->diag[f_str->noff + lkt];
+ for (; lpos < n1; lpos++, diagp++) {
+ if (hsq[aa1[lpos]]>=NMAP) {
+ lpos++ ; diagp++;
+ while (lpos < n1 && hsq[aa1[lpos]]>=NMAP) {lpos++; diagp++;}
+ if (lpos >= n1) break;
+ lhval = 0;
+ }
+ lhval = ((lhval & f_str->hmask) << f_str->kshft) + ppst->hsq[aa1[lpos]];
+ for (tpos = f_str->harr[lhval]; tpos >= 0; tpos = f_str->link[tpos]) {
+ if ((tscor = (dptr = &diagp[-tpos])->stop) >= 0) {
+#else
+ lposn0 = f_str->noff + lpos;
+ for (; lpos < n1; lpos++, lposn0++) {
+ if (hsq[aa1[lpos]]>=NMAP) {lhval = 0; goto loopl;}
+ lhval = ((lhval & f_str->hmask) << f_str->kshft) + ppst->hsq[aa1[lpos]];
+ for (tpos = f_str->harr[lhval]; tpos >= 0; tpos = f_str->link[tpos]) {
+ dpos = lposn0 - tpos;
+ if ((tscor = (dptr = &f_str->diag[dpos % nd])->stop) >= 0) {
+#endif
+ tscor += ktup;
+ if ((tscor -= lpos) <= 0) {
+ scor = dptr->score;
+ if ((tscor += (kfact = f_str->pamh2[lhval])) < 0 && f_str->lowscor < scor)
+#ifdef ALLOCN0
+ savemax (dptr, dpos, f_str);
+#else
+ savemax (dptr, f_str);
+#endif
+ if ((tscor += scor) >= kfact) {
+ dptr->score = tscor;
+ dptr->stop = lpos;
+ }
+ else {
+ dptr->score = kfact;
+ dptr->start = (dptr->stop = lpos) - kt1;
+ }
+ }
+ else {
+ dptr->score += f_str->pamh1[aa0[tpos]];
+ dptr->stop = lpos;
+ }
+ }
+ else {
+ dptr->score = f_str->pamh2[lhval];
+ dptr->start = (dptr->stop = lpos) - kt1;
+ }
+ } /* end tpos */
+
+#ifdef ALLOCN0
+ /* reinitialize diag structure */
+ loopl:
+ if ((dptr = &f_str->diag[lpos % nd])->score > f_str->lowscor)
+ savemax (dptr, lpos, f_str);
+ dptr->stop = -1;
+ dptr->dmax = NULL;
+ dptr->score = 0;
+#endif
+ } /* end lpos */
+
+#ifdef ALLOCN0
+ for (tpos = 0, dpos = f_str->noff + n1 - 1; tpos < n0; tpos++, dpos--) {
+ if ((dptr = &f_str->diag[dpos % nd])->score > f_str->lowscor)
+ savemax (dptr, dpos, f_str);
+ }
+#else
+ for (dptr = f_str->diag; dptr < dpmax;) {
+ if (dptr->score > f_str->lowscor) savemax (dptr, f_str);
+ dptr->stop = -1;
+ dptr->dmax = NULL;
+ dptr++->score = 0;
+ }
+ f_str->ndo = nd;
+#endif
+
+/*
+ at this point all of the elements of aa1[lpos]
+ have been searched for elements of aa0[tpos]
+ with the results in diag[dpos]
+*/
+ /*
+ if (xdebug)
+ fprintf(stderr,"n0: %d; noff: %d; n1: %d; n1x31: %d n1x32 %d\n",
+ n0, f_str->noff,n1,n1x31,n1x32);
+ */
+
+ for (nsave = 0, vmptr = f_str->vmax; vmptr < &f_str->vmax[MAXSAV]; vmptr++)
+ {
+ /*
+ if (xdebug)
+ fprintf(stderr,"0: %4d-%4d 1: %4d-%4d dp: %d score: %d\n",
+ f_str->noff+vmptr->start-vmptr->dp,
+ f_str->noff+vmptr->stop-vmptr->dp,
+ vmptr->start,vmptr->stop,
+ vmptr->dp,vmptr->score);
+ */
+ if (vmptr->score > 0) {
+ vmptr->score = spam (aa0, aa1, vmptr, ppst->pam2[0], f_str);
+ f_str->vptr[nsave++] = vmptr;
+ }
+ }
+
+ if (nsave <= 0) {
+ rst->score[0] = rst->score[1] = rst->score[2] = 0;
+ return;
+ }
+
+#ifndef TFAST
+ /* FASTX code here to modify the start, stop points for
+ the three phases of the translated protein sequence
+ */
+
+ /*
+ fprintf(stderr,"n0x: %d; n0x31:%d; n0x32: %d\n",n0,n0x31,n0x32);
+ for (ib=0; ib<nsave; ib++) {
+ fprintf(stderr,"0: %4d-%4d 1: %4d-%4d dp: %d score: %d\n",
+ f_str->noff+f_str->vptr[ib]->start-f_str->vptr[ib]->dp,
+ f_str->noff+f_str->vptr[ib]->stop-f_str->vptr[ib]->dp,
+ f_str->vptr[ib]->start,f_str->vptr[ib]->stop,
+ f_str->vptr[ib]->dp,f_str->vptr[ib]->score);
+ }
+
+ fprintf(stderr,"---\n");
+ */
+
+ for (ib=0; ib<nsave; ib++) {
+ if (f_str->noff-f_str->vptr[ib]->dp+f_str->vptr[ib]->start >= n0x32)
+ f_str->vptr[ib]->dp += n0x32;
+ if (f_str->noff-f_str->vptr[ib]->dp +f_str->vptr[ib]->start >= n0x31)
+ f_str->vptr[ib]->dp += n0x31;
+ }
+
+ /*
+ for (ib=0; ib<nsave; ib++) {
+ fprintf(stderr,"0: %4d-%4d 1: %4d-%4d dp: %d score: %d\n",
+ f_str->noff+f_str->vptr[ib]->start-f_str->vptr[ib]->dp,
+ f_str->noff+f_str->vptr[ib]->stop-f_str->vptr[ib]->dp,
+ f_str->vptr[ib]->start,f_str->vptr[ib]->stop,
+ f_str->vptr[ib]->dp,f_str->vptr[ib]->score);
+ }
+ */
+#else
+ /* TFAST code here to modify the start, stop points for
+ the three phases of the translated protein sequence
+ TFAST modifies library start points, rather than
+ query start points
+ */
+
+ /*
+ fprintf(stderr,"n0: %d; noff: %d; n1: %d; n1x31: %d n1x32 %d\n",n0, f_str->noff,n1,n1x31,n1x32);
+ for (ib=0; ib<nsave; ib++) {
+ fprintf(stderr,"0: %4d-%4d 1: %4d-%4d dp: %d score: %d\n",
+ f_str->noff+f_str->vptr[ib]->start-f_str->vptr[ib]->dp,
+ f_str->noff+f_str->vptr[ib]->stop-f_str->vptr[ib]->dp,
+ f_str->vptr[ib]->start,f_str->vptr[ib]->stop,
+ f_str->vptr[ib]->dp,f_str->vptr[ib]->score);
+ }
+
+ fprintf(stderr,"---\n");
+ */
+
+ for (ib=0; ib<nsave; ib++) {
+ if (f_str->vptr[ib]->start >= n1x32) {
+ f_str->vptr[ib]->start -= n1x32;
+ f_str->vptr[ib]->stop -= n1x32;
+ f_str->vptr[ib]->dp -= n1x32;
+ }
+ if (f_str->vptr[ib]->start >= n1x31) {
+ f_str->vptr[ib]->start -= n1x31;
+ f_str->vptr[ib]->stop -= n1x31;
+ f_str->vptr[ib]->dp -= n1x31;
+ }
+ }
+
+ /*
+ for (ib=0; ib<nsave; ib++) {
+ fprintf(stderr,"0: %4d-%4d 1: %4d-%4d dp: %d score: %d\n",
+ f_str->noff+f_str->vptr[ib]->start-f_str->vptr[ib]->dp,
+ f_str->noff+f_str->vptr[ib]->stop-f_str->vptr[ib]->dp,
+ f_str->vptr[ib]->start,f_str->vptr[ib]->stop,
+ f_str->vptr[ib]->dp,f_str->vptr[ib]->score);
+ }
+ */
+
+#endif /* TFAST */
+
+ scor = sconn (f_str->vptr, nsave, ppst->param_u.fa.cgap,
+ ppst->param_u.fa.pgap, f_str);
+
+ for (vmptr=f_str->vptr[0],ib=1; ib<nsave; ib++)
+ if (f_str->vptr[ib]->score > vmptr->score) vmptr=f_str->vptr[ib];
+
+/* kssort (f_str->vptr, nsave); */
+
+ rst->score[1] = vmptr->score;
+ rst->score[0] = max (scor, vmptr->score);
+ rst->score[2] = rst->score[0]; /* initn */
+
+ if (ppst->param_u.fa.optflag) {
+ if (rst->score[0] > ppst->param_u.fa.optcut) {
+#ifndef TFAST
+ rst->score[2] = dmatchx(aa0, n0,aa1,n1,*hoff=f_str->noff - vmptr->dp,
+ ppst->param_u.fa.optwid, ppst->pam2[0],
+ ppst->gdelval,ppst->ggapval,ppst->gshift,f_str);
+#else /* TFAST */
+ /* generate f_str->aa1x */
+/*
+ for (i=0; i<n1; i++) {
+ fputc(ppst->sq[aa1[i]],stderr);
+ if (i%60==59) fputc('\n',stderr);
+ }
+ fprintf(stderr,"\n-----\n");
+*/
+/*
+ fprintf(stderr,"n1: %d, aa1x[n1]: %d; EOSEQ: %d\n",
+ n1,f_str->aa1x[n1],EOSEQ);
+ for (fs=aa1,itemp=0; itemp <3; itemp++,fs++) {
+ for (fd= &f_str->aa1x[itemp]; *fs!=EOSEQ; fd += 3, fs++) *fd = *fs;
+ fprintf(stderr,"fs stopped at: %d\n",(int)(fs-f_str->aa1x));
+ *fd=EOSEQ;
+ }
+*/
+/*
+ for (i=0; i<n1; i++) {
+ fputc(ppst->sq[f_str->aa1x[i]],stderr);
+ if (i%60==59) fputc('\n',stderr);
+ }
+*/
+ rst->score[2] = dmatchx(aa0, n0, aa1, n1, *hoff=vmptr->dp-f_str->noff,
+ ppst->param_u.fa.optwid, ppst->pam2[0],
+ ppst->gdelval,ppst->ggapval,ppst->gshift,f_str);
+#endif /* TFAST */
+ }
+ }
+}
+
+void do_work (const unsigned char *aa0, int n0,
+ const unsigned char *aa1, int n1,
+ int frame,
+ struct pstruct *ppst,
+ struct f_struct *f_str,
+ int qr_flg, struct rstruct *rst)
+{
+ int hoff;
+ int last_n1, itx, dnav, n10, i, ir;
+ unsigned char *aa1x;
+
+ rst->escore = 1.0;
+ rst->segnum = rst->seglen = 1;
+
+ if (n1 < ppst->param_u.fa.ktup) {
+ rst->score[0] = rst->score[1] = rst->score[2] = 0;
+ return;
+ }
+
+#ifndef TFAST
+ do_fasta (f_str->aa0x, n0, aa1, n1, ppst, f_str, rst, &hoff);
+#else
+ /* make a precomputed codon number series */
+
+ if (frame == 0) {
+ pre_com(aa1, n1, f_str->aa1v);
+ }
+ else {
+ pre_com_r(aa1, n1, f_str->aa1v);
+ }
+
+ /* make translated sequence */
+ last_n1 = 0;
+ aa1x = f_str->aa1x;
+ for (itx= frame*3; itx< frame*3+3; itx++) {
+ n10 = saatran(aa1,&aa1x[last_n1],n1,itx);
+ /*
+ fprintf(stderr," itt %d frame: %d\n",itx,frame);
+ for (i=0; i<n10; i++) {
+ fprintf(stderr,"%c",aa[f_str->aa1x[last_n1+i]]);
+ if ((i%60)==59) fprintf(stderr,"\n");
+ }
+ fprintf(stderr,"\n");
+
+ fprintf(stderr,"n10: %d aa1x[] %d last_n1: %d\n",n10,aa1x[last_n1+n10],
+ last_n1);
+ */
+ last_n1 += n10+1;
+ }
+ n10 = last_n1-1;
+
+ do_fasta (aa0, n0, f_str->aa1x, n10, ppst, f_str, rst, &hoff);
+#endif
+}
+
+void do_opt (const unsigned char *aa0, int n0,
+ const unsigned char *aa1, int n1,
+ int frame,
+ struct pstruct *ppst,
+ struct f_struct *f_str,
+ struct rstruct *rst)
+{
+ int optflag, tscore, hoff;
+
+ optflag = ppst->param_u.fa.optflag;
+ ppst->param_u.fa.optflag = 1;
+
+#ifndef TFAST
+ do_fasta (f_str->aa0x, n0, aa1, n1, ppst, f_str, rst, &hoff);
+#else
+ do_fasta (aa0, n0, aa1, n1, ppst, f_str, rst, &hoff);
+#endif
+
+ ppst->param_u.fa.optflag = optflag;
+}
+
+#ifdef ALLOCN0
+void
+savemax (dptr, dpos, f_str)
+ register struct dstruct *dptr;
+ int dpos;
+ struct f_struct *f_str;
+{
+ register struct savestr *vmptr;
+ register int i;
+
+#else
+void
+savemax (dptr, f_str)
+ register struct dstruct *dptr;
+ struct f_struct *f_str;
+{
+ register int dpos;
+ register struct savestr *vmptr;
+ register int i;
+
+ dpos = (int) (dptr - f_str->diag);
+
+#endif
+
+/* check to see if this is the continuation of a run that is already saved */
+
+ if ((vmptr = dptr->dmax) != NULL && vmptr->dp == dpos &&
+ vmptr->start == dptr->start)
+ {
+ vmptr->stop = dptr->stop;
+ if ((i = dptr->score) <= vmptr->score)
+ return;
+ vmptr->score = i;
+ if (vmptr != f_str->lowmax)
+ return;
+ }
+ else
+ {
+ i = f_str->lowmax->score = dptr->score;
+ f_str->lowmax->dp = dpos;
+ f_str->lowmax->start = dptr->start;
+ f_str->lowmax->stop = dptr->stop;
+ dptr->dmax = f_str->lowmax;
+ }
+
+ for (vmptr = f_str->vmax; vmptr < &f_str->vmax[MAXSAV]; vmptr++)
+ if (vmptr->score < i)
+ {
+ i = vmptr->score;
+ f_str->lowmax = vmptr;
+ }
+ f_str->lowscor = i;
+}
+
+int spam (const unsigned char *aa0,
+ const unsigned char *aa1,
+ struct savestr *dmax, int **pam2,
+ struct f_struct *f_str)
+{
+ int lpos;
+ int tot, mtot;
+ struct {
+ int start, stop, score;
+ } curv, maxv;
+ const unsigned char *aa0p, *aa1p;
+
+ aa1p = &aa1[lpos = dmax->start];
+ aa0p = &aa0[lpos - dmax->dp + f_str->noff];
+ curv.start = lpos;
+
+ tot = curv.score = maxv.score = 0;
+ for (; lpos <= dmax->stop; lpos++) {
+ tot += pam2[*aa0p++][*aa1p++];
+ if (tot > curv.score) {
+ curv.stop = lpos;
+ curv.score = tot;
+ }
+ else if (tot < 0) {
+ if (curv.score > maxv.score) {
+ maxv.start = curv.start;
+ maxv.stop = curv.stop;
+ maxv.score = curv.score;
+ }
+ tot = curv.score = 0;
+ curv.start = lpos+1;
+ }
+ }
+
+ if (curv.score > maxv.score) {
+ maxv.start = curv.start;
+ maxv.stop = curv.stop;
+ maxv.score = curv.score;
+ }
+
+/* if (maxv.start != dmax->start || maxv.stop != dmax->stop)
+ printf(" new region: %3d %3d %3d %3d\n",maxv.start,
+ dmax->start,maxv.stop,dmax->stop);
+*/
+ dmax->start = maxv.start;
+ dmax->stop = maxv.stop;
+
+ return maxv.score;
+}
+
+#define XFACT 10
+
+int sconn (struct savestr **v, int n,
+ int cgap, int pgap, struct f_struct *f_str)
+{
+ int i, si;
+ struct slink {
+ int score;
+ struct savestr *vp;
+ struct slink *next;
+ } *start, *sl, *sj, *so, sarr[MAXSAV];
+ int lstart, tstart, plstop, ptstop;
+
+/* sort the score left to right in lib pos */
+
+ kpsort (v, n);
+
+ start = NULL;
+
+/* for the remaining runs, see if they fit */
+
+ for (i = 0, si = 0; i < n; i++)
+ {
+
+/* if the score is less than the gap penalty, it never helps */
+ if (v[i]->score < cgap)
+ continue;
+ lstart = v[i]->start;
+ tstart = lstart - v[i]->dp + f_str->noff;
+
+/* put the run in the group */
+ sarr[si].vp = v[i];
+ sarr[si].score = v[i]->score;
+ sarr[si].next = NULL;
+
+/* if it fits, then increase the score */
+ for (sl = start; sl != NULL; sl = sl->next)
+ {
+ plstop = sl->vp->stop;
+ ptstop = plstop - sl->vp->dp + f_str->noff;
+ if (plstop < lstart+XFACT && ptstop < tstart+XFACT) {
+ sarr[si].score = sl->score + v[i]->score + pgap;
+ break;
+ }
+ }
+
+/* now recalculate where the score fits */
+ if (start == NULL)
+ start = &sarr[si];
+ else
+ for (sj = start, so = NULL; sj != NULL; sj = sj->next)
+ {
+ if (sarr[si].score > sj->score)
+ {
+ sarr[si].next = sj;
+ if (so != NULL)
+ so->next = &sarr[si];
+ else
+ start = &sarr[si];
+ break;
+ }
+ so = sj;
+ }
+ si++;
+ }
+
+ if (start != NULL)
+ return (start->score);
+ else
+ return (0);
+}
+
+void
+kssort (v, n)
+struct savestr *v[];
+int n;
+{
+ int gap, i, j;
+ struct savestr *tmp;
+
+ for (gap = n / 2; gap > 0; gap /= 2)
+ for (i = gap; i < n; i++)
+ for (j = i - gap; j >= 0; j -= gap)
+ {
+ if (v[j]->score >= v[j + gap]->score)
+ break;
+ tmp = v[j];
+ v[j] = v[j + gap];
+ v[j + gap] = tmp;
+ }
+}
+
+void
+kpsort (v, n)
+struct savestr *v[];
+int n;
+{
+ int gap, i, j;
+ struct savestr *tmp;
+
+ for (gap = n / 2; gap > 0; gap /= 2)
+ for (i = gap; i < n; i++)
+ for (j = i - gap; j >= 0; j -= gap)
+ {
+ if (v[j]->start <= v[j + gap]->start)
+ break;
+ tmp = v[j];
+ v[j] = v[j + gap];
+ v[j + gap] = tmp;
+ }
+}
+
+static int
+dmatchx(const unsigned char *aa0, int n0,
+ const unsigned char *aa1, int n1,
+ int hoff, int window,
+ int **pam2, int gdelval, int ggapval, int gshift,
+ struct f_struct *f_str)
+{
+
+ hoff -= window/2;
+
+#ifndef TFAST
+ return lx_band(aa1,n1,f_str->aa0v,n0-2,
+ pam2,
+#ifdef OLD_FASTA_GAP
+ -(gdelval - ggapval),
+#else
+ -gdelval,
+#endif
+ -ggapval,-gshift,
+ hoff,window,f_str);
+#else
+ return lx_band(aa0,n0,f_str->aa1v,n1-2,
+ pam2,
+#ifdef OLD_FASTA_GAP
+ -(gdelval - ggapval),
+#else
+ -gdelval,
+#endif
+ -ggapval,-gshift,
+ hoff,window,f_str);
+#endif
+}
+
+static void
+init_row(struct sx_s *row, int sp) {
+ int i;
+ for (i = 0; i < sp; i++) {
+ row[i].C1 = row[i].I1 = 0;
+ row[i].C2 = row[i].I2 = 0;
+ row[i].C3 = row[i].I3 = 0;
+ row[i].flag = 0;
+ }
+}
+
+int lx_band(const unsigned char *prot_seq, /* array with protein sequence numbers*/
+ int len_prot, /* length of prot. seq */
+ const unsigned char *dna_prot_seq, /* translated DNA sequence numbers*/
+ int len_dna_prot, /* length trans. seq. */
+ int **pam_matrix, /* scoring matrix */
+ int gopen, int gext, /* gap open, gap extend penalties */
+ int gshift, /* frame-shift penalty */
+ int start_diag, /* start diagonal of band */
+ int width, /* width for band alignment */
+ struct f_struct *f_str)
+{
+ void *ckalloc();
+ int i, j, bd, bd1, x1, x2, sp, p1=0, p2=0, end_prot;
+ struct sx_s *last, *tmp;
+ int sc, del, best = 0, cd,ci, e1, e2, e3, cd1, cd2, cd3, f, gg;
+ const unsigned char *dp;
+ register struct sx_s *ap, *aq;
+ struct wgt *wt, *ww;
+ int aa, b, a,x,y,z;
+
+ sp = width+7;
+ gg = gopen+gext;
+ /* sp = sp/3+1; */
+
+ if (f_str->cur == NULL) {
+ f_str->cur = (struct sx_s *) ckalloc(sizeof(struct sx_s)*sp);
+ }
+
+ init_row(f_str->cur, sp);
+
+ /*
+ if (start_diag %3 !=0) start_diag = start_diag/3-1;
+ else start_diag = start_diag/3;
+ if (width % 3 != 0) width = width/3+1;
+ else width = width /3;
+ */
+
+ x1 = start_diag; /* x1 = lower bound of DNA */
+ x2 = 1; /* the amount of position shift from last row*/
+
+ end_prot = max(0,-width-start_diag) + (len_dna_prot+5)/3 + width;
+ end_prot = min(end_prot,len_prot);
+
+ /* i counts through protein sequence, x1 through DNAp */
+
+ for (i = max(0, -width-start_diag), x1+=i; i < len_prot; i++, x1++) {
+ bd = min(x1+width, (len_dna_prot+2)/3); /* upper bound of band */
+ bd1 = max(0,x1); /* lower bound of band */
+ wt = f_str->weight0[prot_seq[i]];
+ del = 1-x1; /*adjustment*/
+ bd += del;
+ bd1 +=del;
+
+ ap = &f_str->cur[bd1]; aq = ap+1;
+ e1 = f_str->cur[bd1-1].C3; e2 = ap->C1; cd1 = cd2= cd3= 0;
+ for (dp = &dna_prot_seq[(bd1-del)*3]; ap < &f_str->cur[bd]; ap++) {
+ ww = &wt[(unsigned char) *dp++];
+ sc = max(max(e1+ww->iv, (e3=ap->C2)+ww->ii), e2+ww->iii);
+ if (cd1 > sc) sc = cd1;
+ cd1 -= gext;
+ if ((ci = aq->I1) > 0) {
+ if (sc < ci) { ap->C1 = ci; ap->I1 = ci-gext;}
+ else {
+ ap->C1 = sc;
+ sc -= gg;
+ if (sc > 0) {
+ if (sc > best) best =sc;
+ if (cd1 < sc) cd1 = sc;
+ ap->I1 = max(ci-gext, sc);
+ } else ap->I1 = ci-gext;
+ }
+ } else {
+ if (sc <= 0) {
+ ap->I1 = ap->C1 = 0;
+ } else {
+ ap->C1 = sc; sc-=gg;
+ if (sc >0) {
+ if (sc > best) best =sc;
+ if (cd1 < sc) cd1 = sc;
+ ap->I1 = sc;
+ } else ap->I1 = 0;
+ }
+ }
+ ww = &wt[(unsigned char) *dp++];
+ sc = max(max(e2+ww->iv, (e1=ap->C3)+ww->ii), e3+ww->iii);
+ if (cd2 > sc) sc = cd2;
+ cd2 -= gext;
+ if ((ci = aq->I2) > 0) {
+ if (sc < ci) { ap->C2 = ci; ap->I2 = ci-gext;}
+ else {
+ ap->C2 = sc;
+ sc -= gg;
+ if (sc > 0) {
+ if (sc > best) best =sc;
+ if (cd2 < sc) cd2 = sc;
+ ap->I2 = max(ci-gext, sc);
+ }
+ }
+ } else {
+ if (sc <= 0) {
+ ap->I2 = ap->C2 = 0;
+ } else {
+ ap->C2 = sc; sc-=gg;
+ if (sc >0) {
+ if (sc > best) best =sc;
+ if (cd2 < sc) cd2 = sc;
+ ap->I2 = sc;
+ } else ap->I2 = 0;
+ }
+ }
+ ww = &wt[(unsigned char)*dp++];
+ sc = max(max(e3+ww->iv, (e2=aq->C1)+ww->ii), e1+ww->iii);
+ if (cd3 > sc) sc = cd3;
+ cd3 -= gext;
+ if ((ci = aq++->I3) > 0) {
+ if (sc < ci) { ap->C3 = ci; ap->I3 = ci-gext;}
+ else {
+ ap->C3 = sc;
+ sc -= gg;
+ if (sc > 0) {
+ if (sc > best) best =sc;
+ if (cd3 < sc) cd3 = sc;
+ ap->I3 = max(ci-gext, sc);
+ }
+ }
+ } else {
+ if (sc <= 0) {
+ ap->I3 = ap->C3 = 0;
+ } else {
+ ap->C3 = sc; sc-=gg;
+ if (sc >0) {
+ if (sc > best) best =sc;
+ if (cd3 < sc) cd3 = sc;
+ ap->I3 = sc;
+ } else ap->I3 = 0;
+ }
+ }
+ }
+ }
+ /* printf("The best score is %d\n", best); */
+ return best+gg;
+}
+
+/* ckalloc - allocate space; check for success */
+void *ckalloc(size_t amount)
+{
+ void *p;
+
+ if ((p = (void *)malloc( (size_t)amount)) == NULL)
+ w_abort("Ran out of memory.","");
+ return(p);
+}
+
+/* calculate the 100% identical score */
+int
+shscore(unsigned char *aa0, int n0, int **pam2)
+{
+ int i, sum;
+ for (i=0,sum=0; i<n0; i++)
+ sum += pam2[aa0[i]][aa0[i]];
+ return sum;
+}
+
+#define SGW1 100
+#define SGW2 300
+#define WIDTH 60
+
+typedef struct mat *match_ptr;
+
+typedef struct mat {
+ int i, j, l;
+ match_ptr next;
+} match_node;
+
+typedef struct { int i,j;} state;
+typedef state *state_ptr;
+
+
+void *ckalloc();
+static match_ptr small_global(), global();
+static int local_align(), find_best();
+static void init_row2(), init_ROW();
+
+int
+pro_dna(const unsigned char *prot_seq, /* array with prot. seq. numbers*/
+ int len_prot, /* length of prot. seq */
+ const unsigned char *dna_prot_seq, /* trans. DNA seq. numbers*/
+ int len_dna_prot, /* length trans. seq. */
+ int **pam_matrix, /* scoring matrix */
+ int gopen, int gext, /* gap open, gap extend penalties */
+ int gshift, /* frame-shift penalty */
+ struct f_struct *f_str,
+ int max_res,
+ struct a_res_str *a_res) /* alignment info */
+{
+ match_ptr align, ap, aq;
+ int x, y, ex, ey, i, score;
+ int *alignment;
+
+ f_str->up = (st_ptr) ckalloc(sizeof(struct st_s)*(len_dna_prot+10));
+ f_str->down = (st_ptr) ckalloc(sizeof(struct st_s)*(len_dna_prot+10));
+ f_str->tp = (st_ptr) ckalloc(sizeof(struct st_s)*(len_dna_prot+10));
+
+ /*local alignment find the best local alignment x and y
+ is the starting position of the best local alignment
+ and ex ey is the ending position */
+
+ score= local_align(&x, &y, &ex, &ey,
+ pam_matrix, gopen, gext,
+ dna_prot_seq, len_dna_prot,
+ prot_seq, len_prot, f_str);
+
+ f_str->up += 3; f_str->down += 3; f_str->tp += 3;
+
+ /* x, y - start in prot, dna_prot */
+ a_res->min0 = x; /* prot */
+ a_res->min1 = y; /* DNA */
+ a_res->max0 = ex; /* prot */
+ a_res->max1 = ey; /* DNA */
+
+ align = global(x, y, ex, ey,
+ pam_matrix, gopen, gext,
+ dna_prot_seq, prot_seq,
+ 0, 0, f_str);
+
+ alignment = a_res->res;
+
+ for (ap = align, i= 0; ap; i++) {
+ if (i < max_res) alignment[i] = ap->l;
+ aq = ap->next; free(ap); ap = aq;
+ }
+ if (i >= max_res)
+ fprintf(stderr,"***alignment truncated: %d/%d***\n", max_res,i);
+
+ /* up = &up[-3]; down = &down[-3]; tp = &tp[-3]; */
+ free(&f_str->up[-3]); free(&f_str->tp[-3]); free(&f_str->down[-3]);
+
+ a_res->nres = i;
+ return score;
+}
+
+static void
+swap(void **a, void **b)
+{
+ void *t = *a;
+ *a = *b; *b = t;
+}
+
+/*
+ local alignment find the best local alignment x and y
+ is the starting position of the best local alignment
+ and ex ey is the ending position
+*/
+static int
+local_align(int *x, int *y, int *ex, int *ey,
+ int **wgts, int gop, int gext,
+ const unsigned char *dnap, int ld,
+ const unsigned char *pro, int lp,
+ struct f_struct *f_str)
+{
+ int i, j, score, x1,x2,x3,x4, e1 = 0, e2 = 0, e3,
+ sc, del, e, best = 0, cd, ci, c;
+ struct wgt *wt, *ww;
+ state_ptr cur_st, last_st, cur_i_st;
+ st_ptr cur, last;
+ const unsigned char *dp;
+ int *cur_d_st, *st_up;
+
+ /*
+ Array rowiC stores the best scores of alignment ending at a position
+ Arrays rowiD and rowiI store the best scores of alignment ending
+ at a position with a deletion or insrtion
+ Arrays sti stores the starting position of the best alignment whose
+ score stored in the corresponding row array.
+ The program stores two rows to complete the computation, same is
+ for the global alignment routine.
+ */
+
+
+ st_up = (int *) ckalloc(sizeof(int)*(ld+10));
+ init_row2(st_up, ld+5);
+
+ ld += 2;
+
+ init_ROW(f_str->up, ld+1);
+ init_ROW(f_str->down, ld+1);
+ cur = f_str->up+1;
+ last = f_str->down+1;
+
+ cur_st = (state_ptr) ckalloc(sizeof(state)*(ld+1));
+ last_st = (state_ptr) ckalloc(sizeof(state)*(ld+1));
+ cur_i_st = (state_ptr) ckalloc(sizeof(state)*(ld+1));
+ cur_d_st = st_up;
+ dp = dnap-2;
+ for (i = 0; i < lp; i++) {
+ wt = f_str->weight1[pro[i]]; e2 =0; e1 = last[0].C;
+ for (j = 0; j < 2; j++) {
+ cur_st[j].i = i+1;
+ cur_st[j].j = j+1;
+ }
+ for (j = 2; j < ld; j++) {
+ ww = &wt[(unsigned char) dp[j]];
+ del = -1;
+ if (j >= 3) {
+ sc = 0;
+ e3 = e2; e2 = e1;
+ e1 = last[j-2].C;
+ if ((e=e2+ww->iii) > sc) {sc = e; del = 3;}
+ if ((e=e1+ww->ii) > sc) {sc = e; del = 2;}
+ if ((e = e3+ww->iv) > sc) {sc = e; del = 4;}
+ } else {
+ sc = e2 = 0;
+ if (ww->iii > 0) {sc = ww->iii; del = 3;}
+ }
+ if (sc < (ci=last[j].I)) {
+ sc = ci; del = 0;
+ }
+ if (sc < (cd=cur[j].D)) {
+ sc = cd; del = 5;
+ }
+ cur[j].C = sc;
+ e = sc - gop;
+ if (e > cd) {
+ cur[j+3].D = e-gext;
+ cur_d_st[j+3] = 3;
+ } else {
+ cur[j+3].D = cd-gext;
+ cur_d_st[j+3] = cur_d_st[j]+3;
+ }
+ switch(del) {
+ case 5:
+ c = cur_d_st[j];
+ cur_st[j].i = cur_st[j-c].i;
+ cur_st[j].j = cur_st[j-c].j;
+ break;
+ case 0:
+ cur_st[j].i = cur_i_st[j].i;
+ cur_st[j].j = cur_i_st[j].j;
+ break;
+ case 2:
+ case 3:
+ case 4:
+ if (i) {
+ if (j-del >= 0) {
+ cur_st[j].i = last_st[j-del].i;
+ cur_st[j].j = last_st[j-del].j;
+ } else {
+ cur_st[j].i = i;
+ cur_st[j].j = 0;
+ }
+ } else {
+ cur_st[j].i = 0;
+ cur_st[j].j = max(0, j-del+1);
+ }
+ break;
+ case -1:
+ cur_st[j].i = i+1;
+ cur_st[j].j = j+1;
+ break;
+ }
+ if (e > ci) {
+ cur[j].I = e -gext;
+ cur_i_st[j].i = cur_st[j].i;
+ cur_i_st[j].j = cur_st[j].j;
+ } else {
+ cur[j].I = ci- gext;
+ }
+ if (sc > best) {
+ x1 = cur_st[j].i;
+ x2 = cur_st[j].j;
+ best =sc;
+ x3 = i;
+ x4 = j;
+ }
+ }
+ swap((void *)&last, (void *)&cur);
+ swap((void *)&cur_st, (void *)&last_st);
+ }
+ /* printf("The best score is %d\n", best);*/
+ *x = x1; *y = x2; *ex = x3; *ey = x4;
+ free(cur_st); free(last_st); free(cur_i_st);
+ free(st_up);
+ return best;
+}
+
+/*
+ Both global_up and global_down do linear space score only global
+ alignments on subsequence pro[x]...pro[ex], and dna[y]...dna[ey].
+ global_up do the algorithm upwards, from row x towards row y.
+ global_down do the algorithm downwards, from row y towards x.
+*/
+
+static void
+global_up(st_ptr *row1, st_ptr *row2,
+ int x, int y, int ex, int ey,
+ int **wgts, int gop, int gext,
+ unsigned char *dnap, unsigned char *pro,
+ int N, struct f_struct *f_str)
+{
+ int i, j, k, sc, e, e1, e2, e3, t, ci, cd, score;
+ struct wgt *wt, *ww;
+ st_ptr cur, last;
+
+ cur = *row1; last = *row2;
+ sc = -gop;
+ for (j = 0; j <= ey-y+1; j++) {
+ if (j % 3 == 0) {last[j].C = sc; sc -= gext; last[j].I = sc-gop;}
+ else { last[j].I = last[j].C = -10000;}
+ }
+ last[0].C = 0; cur[0].D = cur[1].D = cur[2].D = -10000;
+ last[0].D = last[1].D = last[2].D = -10000;
+ if (N) last[0].I = -gext;
+ for (i = 1; i <= ex-x+1; i++) {
+ wt = f_str->weight1[pro[i+x-1]]; e1 = -10000; e2 = last[0].C;
+ for (j = 0; j <= ey-y+1; j++) {
+ t = j+y;
+ sc = -10000;
+ ww = &wt[(unsigned char) dnap[t-3]];
+ if (j < 4) {
+ if (j == 3) {
+ sc = e2+ww->iii;
+ } else if (j == 2) {
+ sc = e2 + ww->ii;
+ }
+ } else {
+ e3 = e2; e2 = e1;
+ e1 = last[j-2].C;
+ sc = max(e2+ww->iii, max(e1+ww->ii, e3+ww->iv));
+ }
+ sc = max(sc, max(ci=last[j].I, cd = cur[j].D));
+ cur[j].C = sc;
+ cur[j+3].D = max(cd, sc-gop)-gext;
+ cur[j].I = max(ci, sc-gop)-gext;
+ }
+ swap((void *)&last, (void *)&cur);
+ }
+ /*printf("global up score =%d\n", last[ey-y+1].C);*/
+ for (i = 0; i <= ey-y+1; i++) last[i].I = cur[i].I;
+ if (*row1 != last) swap((void *)row1, (void *)row2);
+}
+
+static void
+global_down(st_ptr *row1, st_ptr *row2,
+ int x, int y, int ex, int ey,
+ int **wgts, int gop, int gext,
+ unsigned char *dnap, unsigned char *pro,
+ int N, struct f_struct *f_str)
+{
+ int i, j, k, sc, del, *tmp, e, t, e1,e2,e3, ci,cd, score;
+ struct wgt *wt, *w1, *w2, *w3;
+ st_ptr cur, last;
+
+ cur = (*row1); last = *row2;
+ sc = -gop;
+ for (j = ey-y+1; j >= 0; j--) {
+ if ((ey-y+1-j) % 3) {last[j].C = sc; sc-=gext; last[j].I = sc-gop;}
+ else last[j].I = last[j].C = -10000;
+ cur[j].I = -10000;
+ }
+ last[ey-y+1].C = 0;
+ if (N) last[ey-y+1].I = -gext;
+ cur[ey-y+1].D = cur[ey-y].D = cur[ey-y-1].D = -10000;
+ last[ey-y+1].D = last[ey-y].D = last[ey-y-1].D = -10000;
+ for (i = ex-x; i >= 0; i--) {
+ wt = f_str->weight1[pro[i+x]]; e2 = last[ey-y+1].C;
+ e1 = -10000;
+ w3 = &wt[(unsigned char) dnap[ey]];
+ w2 = &wt[(unsigned char) dnap[ey-1]];
+ for (j = ey-y+1; j >= 0; j--) {
+ t = j+y;
+ w1 = &wt[(unsigned char) dnap[t-1]];
+ sc = -10000;
+ if (t+3 > ey) {
+ if (t+2 == ey) {
+ sc = e2+w2->iii;
+ } else if (t+1 == ey) {
+ sc = e2+w1->ii;
+ }
+ } else {
+ e3 = e2; e2 = e1;
+ e1 = last[j+2].C;
+ sc = max(e2+w2->iii, max(e1+w1->ii,e3+w3->iv)) ;
+ }
+ if (sc < (cd= cur[j].D)) {
+ sc = cd;
+ cur[j-3].D = cd-gext;
+ } else cur[j-3].D =max(cd, sc-gop)-gext;
+ if (sc < (ci= last[j].I)) {
+ sc = ci;
+ cur[j].I = ci - gext;
+ } else cur[j].I = max(sc-gop,ci)-gext;
+ cur[j].C = sc;
+ w3 = w2; w2 = w1;
+ }
+ swap((void *)&last, (void *)&cur);
+ }
+ for (i = 0; i <= ey-y+1; i++) last[i].I = cur[i].I;
+ if (*row1 != last) swap((void *)row1, (void *)row2);
+}
+
+static void
+init_row2(int *row, int ld) {
+ int i;
+ for (i = 0; i < ld; i++) row[i] = 0;
+}
+
+static void init_ROW(st_ptr row, int ld) {
+ int i;
+ for (i = 0; i < ld; i++) row[i].I = row[i].D = row[i].C = 0;
+}
+
+static match_ptr
+combine(match_ptr x1, match_ptr x2, int st) {
+ match_ptr x;
+
+ if (x1 == NULL) return x2;
+ for (x = x1; x->next; x = x->next);
+ x->next = x2;
+ if (st) {
+ for (x = x2; x; x = x->next) {
+ x->j++;
+ if (x->l == 3 || x->l == 4) break;
+ }
+ x->l--;
+ }
+ return x1;
+}
+
+/*
+ global use the two upwards and downwards score only linear
+ space global alignment subroutine to recursively build the
+ alignment.
+*/
+
+match_ptr
+global(int x, int y, int ex, int ey,
+ int **wgts, int gop, int gext,
+ unsigned char *dnap, unsigned char *pro, int N1, int N2,
+ struct f_struct *f_str)
+{
+ int m;
+ int m1, m2;
+ match_ptr x1, x2, mm1, mm2;
+
+ /*printf("%d %d %d %d %d %d\n", x,y, ex, ey, N1, N2);*/
+ /*
+ if the space required is limited, we can do a quadratic space
+ algorithm to find the alignment.
+ */
+
+ if (ex <= x) {
+ mm1 = NULL;
+ for (m = y+3; m <= ey; m+=3) {
+ x1 = (match_ptr) ckalloc(sizeof(match_node));
+ x1->l = 5; x1->next = mm1;
+ if (mm1== NULL) mm2 = x1;
+ mm1 = x1;
+ }
+ if (ex == x) {
+ if ((ey-y) % 3 != 0) {
+ x1 = (match_ptr) ckalloc(sizeof(match_node));
+ x1->l = ((ey-y) % 3) +1; x1->next = NULL;
+ if (mm1) mm2->next = x1; else mm1 = x1;
+ } else mm2->l = 4;
+ }
+ return mm1;
+ }
+ if (ey <= y) {
+ mm1 = NULL;
+ for (m = x; m <= ex; m++) {
+ x1 = (match_ptr) ckalloc(sizeof(match_node));
+ x1->l = 0; x1->next = mm1; mm1 = x1;
+ }
+ return mm1;
+ }
+ if (ex -x < SGW1 && ey-y < SGW2)
+ return small_global(x,y,ex,ey,wgts, gop, gext, dnap, pro, N1, N2,f_str);
+ m = (x+ex)/2;
+ /*
+ Do the score only global alignment from row x to row m, m is
+ the middle row of x and ex. Store the information of row m in
+ upC, upD, and upI.
+ */
+ global_up(&f_str->up, &f_str->tp, x, y, m, ey,
+ wgts, gop, gext,
+ dnap, pro, N1, f_str);
+ /*
+ Do the score only global alignment downwards from row ex
+ to row m+1, store information of row m+1 in downC downI and downD
+ */
+ global_down(&f_str->down, &f_str->tp, m+1, y, ex, ey,
+ wgts, gop, gext,
+ dnap, pro, N2, f_str);
+
+ /*
+ Use this information for row m and m+1 to find the crossing
+ point of the best alignment with the middle row. The crossing
+ point is given by m1 and m2. Then we recursively call global
+ itself to compute alignments in two smaller regions found by
+ the crossing point and combine the two alignments to form a
+ whole alignment. Return that alignment.
+ */
+ if (find_best(f_str->up, f_str->down, &m1, &m2, ey-y+1, y, gop)) {
+ x1 = global(x, y, m, m1, wgts, gop, gext, dnap, pro, N1, 0, f_str);
+ x2 = global(m+1, m2, ex, ey, wgts, gop, gext, dnap, pro, 0, N2, f_str);
+ if (m1 == m2) x1 = combine(x1,x2,1);
+ else x1 = combine(x1, x2,0);
+ } else {
+ x1 = global(x, y, m-1, m1, wgts, gop, gext, dnap, pro, N1, 1, f_str);
+ x2 = global(m+2, m2, ex, ey, wgts, gop, gext, dnap, pro, 1, N2, f_str);
+ mm1 = (match_ptr) ckalloc(sizeof(match_node));
+ mm1->i = m; mm1->l = 0; mm1->j = m1;
+ mm2 = (match_ptr) ckalloc(sizeof(match_node));
+ mm2->i = m+1; mm2->l = 0; mm2->j = m1;
+ mm1->next = mm2; mm2->next = x2;
+ x1 = combine(x1, mm1, 0);
+ }
+ return x1;
+}
+
+static int
+find_best(st_ptr up, st_ptr down, int *m1, int *m2, int ld, int y, int gop) {
+
+ int i, best = -1000, j = 0, s1, s2, s3, s4, st;
+
+ for (i = 1; i < ld; i++) {
+ s2 = up[i].C + down[i].C;
+ s4 = up[i].I + down[i].I + gop;
+ if (best < s2) {
+ best = s2; j = i; st = 1;
+ }
+ if (best < s4) {
+ best = s4; j = i; st = 0;
+ }
+ }
+ *m1 = j-1+y;
+ *m2 = j+y;
+ /*printf("score=%d\n", best);*/
+ return st;
+}
+
+/*
+ An alignment is represented as a linked list whose element
+ is of type match_node. Each element represent an edge in the
+ path of the alignment graph. The fields of match_node are
+ l --- gives the type of the edge.
+ i, j --- give the end position.
+*/
+
+static match_ptr
+small_global(int x, int y, int ex, int ey,
+ int **wgts, int gop, int gext,
+ unsigned char *dnap, unsigned char *pro,
+ int N1, int N2, struct f_struct *f_str) {
+
+ static int C[SGW1+1][SGW2+1], st[SGW1+1][SGW2+1], D[SGW2+7], I[SGW2+1];
+ int i, j, e, sc, score, del, k, t, ci, cd;
+ int *cI, *cD, *cC, *lC, *cst, e2, e3, e4;
+ match_ptr mp, first;
+ struct wgt *wt, *ww;
+
+ /*printf("small_global %d %d %d %d\n", x, y, ex, ey);*/
+ sc = -gop-gext; C[0][0] = 0;
+ if (N1) I[0] = -gext; else I[0] = sc;
+
+ for (j = 1; j <= ey-y+1; j++) {
+ if (j % 3== 0) {
+ C[0][j] = sc; sc -= gext; I[j] = sc-gop;
+ } else I[j] = C[0][j] = -10000;
+ st[0][j] = 5;
+ }
+ lC = &C[0][0]; cD = D; D[0] = D[1] = D[2] = -10000;
+ cI = I;
+ for (i = 1; i <= ex-x+1; i++) {
+ cC = &C[i][0];
+ wt = f_str->weight1[pro[i+x-1]]; cst = &st[i][0];
+ for (j = 0; j <=ey-y+1; j++) {
+ ci = cI[j];
+ cd= cD[j];
+ t = j+y;
+ ww = &wt[(unsigned char) dnap[t-3]];
+ if (j >= 4) {
+ sc = lC[j-3]+ww->iii; e2 = lC[j-2]+ww->ii;
+ e4 = lC[j-4]+ww->iv; del = 3;
+ if (e2 > sc) { sc = e2; del = 2;}
+ if (e4 >= sc) { sc = e4; del = 4;}
+ } else {
+ if (j == 3) {
+ sc = lC[0]+ww->iii; del =3;
+ } else if (j == 2) {
+ sc = lC[0]+ww->ii; del = 2;
+ } else {sc = -10000; del = 0;}
+ }
+ if (sc < ci) {
+ sc = ci; del = 0;
+ }
+ if (sc <= cd) {
+ sc = cd;
+ del = 5;
+ }
+ cC[j] = sc;
+ sc -= gop;
+ if (sc <= cd) {
+ del += 10;
+ cD[j+3] = cd - gext;
+ } else cD[j+3] = sc -gext;
+ if (sc < ci) {
+ del += 20;
+ cI[j] = ci-gext;
+ } else cI[j] = sc-gext;
+ *(cst++) = del;
+ }
+ lC = cC;
+ }
+ /*printf("small global score =%d\n", C[ex-x+1][ey-y+1]);*/
+ if (N2 && cC[ey-y+1] < ci+gop) st[ex-x+1][ey-y+1] =0;
+ first = NULL; e = 1;
+ for (i = ex+1, j = ey+1; i > x || j > y; i--) {
+ mp = (match_ptr) ckalloc(sizeof(match_node));
+ mp->i = i-1;
+ k = (t=st[i-x][j-y])%10;
+ mp->j = j-1;
+ if (e == 5 && (t/10)%2 == 1) k = 5;
+ if (e == 0 && (t/20)== 1) k = 0;
+ if (k == 5) { j -= 3; i++; e=5;}
+ else {j -= k;if (k==0) e= 0; else e = 1;}
+ mp->l = k;
+ mp->next = first;
+ first = mp;
+ }
+
+ /* for (i = 0; i <= ex-x; i++) {
+ for (j = 0; j <= ey-y; j++)
+ printf("%d ", C[i][j]);
+ printf("\n");
+ }
+ */
+ return first;
+}
+
+#define XTERNAL
+#include "upam.h"
+
+void
+display_alig(int *a, unsigned char *dna, unsigned char *pro,
+ int length, int ld, struct f_struct *f_str)
+{
+ int len = 0, i, j, x, y, lines, k, iaa;
+ static char line1[100], line2[100], line3[100],
+ tmp[10] = " ", *st;
+ char *dna1, c1, c2, c3;
+
+ line1[0] = line2[0] = line3[0] = '\0'; x= a[0]; y = a[1]-3;
+
+ printf("\n%5d\n%5d", y+3, x);
+ for (len = 0, j = 2, lines = 0; j < length; j++) {
+ i = a[j];
+ line3[len] = ' ';
+ switch (i) {
+ case 3:
+ y += 3;
+ line2[len] = aa[iaa=pro[x++]];
+ line1[len] = f_str->weight_c[iaa][(unsigned char) dna[y]].c5;
+ if (line1[len] != f_str->weight_c[iaa][(unsigned char) dna[y]].c3)
+ line3[len] = f_str->weight_c[iaa][(unsigned char) dna[y]].c3;
+ break;
+ case 2:
+ y += 2;
+ line1[len] = '\\';
+ line2[len++] = ' ';
+ line2[len] = aa[iaa=pro[x++]];
+ line1[len] = f_str->weight_c[iaa][(unsigned char) dna[y]].c2;
+ line3[len] = f_str->weight_c[iaa][(unsigned char) dna[y]].c3;
+ break;
+ case 4:
+ y += 4;
+ line1[len] = '/';
+ line2[len++] = ' ';
+ line2[len] = aa[iaa=pro[x++]];
+ line1[len] = f_str->weight_c[iaa][(unsigned char) dna[y]].c4;
+ line3[len] = f_str->weight_c[iaa][(unsigned char) dna[y]].c3;
+ break;
+ case 5:
+ y += 3;
+ line1[len] = f_str->weight_c[0][(unsigned char) dna[y]].c3;
+ line2[len] = '-';
+ break;
+ case 0:
+ line1[len] = '-';
+ line2[len] = aa[pro[x++]];
+ break;
+ }
+ len++;
+ line1[len] = line2[len] = line3[len] = '\0';
+ if (len >= WIDTH) {
+ for (k = 10; k <= WIDTH; k+=10)
+ printf(" . :");
+ if (k-5 < WIDTH) printf(" .");
+ c1 = line1[WIDTH]; c2 = line2[WIDTH]; c3 = line3[WIDTH];
+ line1[WIDTH] = line2[WIDTH] = line3[WIDTH] = '\0';
+ printf("\n %s\n %s\n %s\n", line1, line3, line2);
+ line1[WIDTH] = c1; line2[WIDTH] = c2;
+ strncpy(line1, &line1[WIDTH], sizeof(line1)-1);
+ strncpy(line2, &line2[WIDTH], sizeof(line2)-1);
+ strncpy(line3, &line3[WIDTH], sizeof(line3)-1);
+ len = len - WIDTH;
+ printf("\n%5d\n%5d", y+3, x);
+ }
+ }
+ for (k = 10; k < len; k+=10)
+ printf(" . :");
+ if (k-5 < len) printf(" .");
+ printf("\n %s\n %s\n %s\n", line1, line3, line2);
+}
+
+
+/* alignment store the operation that align the protein and dna sequence.
+ The code of the number in the array is as follows:
+ 0: delete of an amino acid.
+ 2: frame shift, 2 nucleotides match with an amino acid
+ 3: match an amino acid with a codon
+ 4: the other type of frame shift
+ 5: delete of a codon
+
+
+ Also the first two element of the array stores the starting point
+ in the protein and dna sequences in the local alignment.
+
+ Display looks like where WIDTH is assumed to be divisible by 10.
+
+ 0 . : . : . : . : . : . :
+ AACE/N\PLK\G\HK\Y/LWA\S\C\E/P\PRIRZ/G\HK\Y/LWA\S\C\E/P\PRIRZ
+ I S G S V F N R Q L A G S V F N R Q L A
+ AACE P P-- G HK Y TWA A C E P P---- G HK Y TWA A C E P P----
+
+ 60 . : . : . : . : . : . :
+ /G\HK\Y/LWA\S\C\E/P\PRIRZ/G\HK\Y/LWA\S\C\E/P\PRIRZ/G\HK\Y/LW
+ G S V F N R Q L A G S V F N R Q L A G S V F
+ G HK Y TWA A C E P P---- G HK Y TWA A C E P P---- G HK Y TW
+
+For frame shift, the middle row show the letter in the original sequence,
+and the letter in the top row is the amino acid that is chose by the
+alignment (translated codon chosen from 4 nucleotides, or 2+1).
+*/
+
+/* fatal - print message and die */
+void
+fatal(msg)
+ char *msg;
+{
+ fprintf(stderr, "%s\n", msg);
+ exit(1);
+}
+
+int do_walign (const unsigned char *aa0, int n0,
+ const unsigned char *aa1, int n1,
+ int frame,
+ struct pstruct *ppst,
+ struct f_struct *f_str,
+ struct a_res_str *a_res,
+ int *have_ares)
+{
+ int score;
+ int i, ir, last_n1, itemp, n10, itx, dnav;
+ unsigned char *aa1x;
+
+ a_res->res = f_str->res;
+
+#ifndef TFAST
+ score = pro_dna(aa1, n1, f_str->aa0v, n0-2, ppst->pam2[0],
+#ifdef OLD_FASTA_GAP
+ -(ppst->gdelval - ppst->ggapval),
+#else
+ -ppst->gdelval,
+#endif
+ -ppst->ggapval,
+ -ppst->gshift,
+ f_str, f_str->max_res, a_res);
+ /* display_alig(f_str->res,f_str->aa0v+2,aa1,*nres,n0-2,f_str); */
+
+#else
+ /* make a precomputed codon number series */
+ if (frame==0) {
+ pre_com(aa1, n1, f_str->aa1v);
+ }
+ else { /* must do things backwards */
+ pre_com_r(aa1, n1, f_str->aa1v);
+ }
+
+ /* make translated sequence */
+ last_n1 = 0;
+ aa1x = f_str->aa1x;
+ for (itx= frame*3; itx< frame*3+3; itx++) {
+ n10 = saatran(aa1,&aa1x[last_n1],n1,itx);
+ /*
+ fprintf(stderr," itt %d itx: %d\n",itt,itx);
+ for (i=0; i<n10; i++) {
+ fprintf(stderr,"%c",aa[f_str->aa1x[last_n1+i]]);
+ if ((i%60)==59) fprintf(stderr,"\n");
+ }
+ fprintf(stderr,"\n");
+ */
+ last_n1 += n10+1;
+ }
+ n10 = last_n1-1;
+
+ score = pro_dna(aa0, n0, f_str->aa1v, n1-2, ppst->pam2[0],
+#ifdef OLD_FASTA_GAP
+ -(ppst->gdelval - ppst->ggapval),
+#else
+ -ppst->gdelval,
+#endif
+ -ppst->ggapval,
+ -ppst->gshift,
+ f_str, f_str->max_res, a_res);
+ /* display_alig(f_str->res,f_str->aa0y,aa1,*nres,n0,f_str); */
+#endif
+ a_res->res = f_str->res;
+ *have_ares = 1;
+
+ return score;
+}
+
+void
+pre_cons(const unsigned char *aa1, int n1, int frame, struct f_struct *f_str) {
+
+#ifdef TFAST
+ int i, last_n1, itemp, n10;
+ unsigned char *fs, *fd;
+ int itx;
+
+ /* make a precomputed codon number series */
+ if (frame==0) {
+ pre_com(aa1, n1, f_str->aa1v);
+ }
+ else { /* must do things backwards */
+ pre_com_r(aa1, n1, f_str->aa1v);
+ }
+#endif
+}
+
+/* aln_func_vals - set up aln.qlfact, qlrev, llfact, llmult, frame, llrev */
+/* call from calcons, calc_id, calc_code */
+void
+aln_func_vals(int frame, struct a_struct *aln) {
+
+#ifndef TFAST
+ aln->llrev = 0;
+ aln->llfact = 1;
+ aln->llmult = 1;
+ aln->qlfact = 3;
+ aln->frame = 0;
+ if (frame > 0) aln->qlrev = 1;
+ else aln->qlrev = 0;
+#else /* TFASTX */
+ aln->qlfact = 1;
+ aln->qlrev = 0;
+ aln->llfact = 3;
+ aln->llmult = 1;
+ aln->frame = 0;
+ if (frame > 0) aln->llrev = 1;
+ else aln->llrev = 0;
+#endif /* TFASTX */
+}
+
+#include "structs.h"
+#include "a_mark.h"
+
+int calcons(const unsigned char *aa0, int n0,
+ const unsigned char *aa1, int n1,
+ int *nc,
+ struct a_struct *aln,
+ struct a_res_str a_res,
+ struct pstruct pst,
+ char *seqc0, char *seqc1, char *seqca,
+ struct f_struct *f_str)
+{
+ int i0, i1;
+ int lenc, not_c, itmp, ngap_p, ngap_d, nfs;
+ char *sp0, *sp1, *spa, *sq;
+ unsigned char aap;
+ const unsigned char *ap0, *ap1;
+ int *rp, *rpmax;
+ int *res;
+
+ /* don't fill in the ends */
+
+
+ res = a_res.res;
+ rpmax = &res[a_res.nres]; /* end of alignment info */
+
+ if (pst.ext_sq_set) {sq = pst.sqx;}
+ else {sq = pst.sq;}
+
+ /* res[0] has start of protein sequence */
+ /* res[1] has start of translated DNA sequence */
+
+#ifndef TFAST /* FASTX */
+ ap0 = f_str->aa0v; /* computed codons -> ap0*/
+ ap1 = aa1; /* protein sequence -> ap1 */
+ aln->smin1 = a_res.min0; /* start in protein sequence */
+ aln->smin0= a_res.min1; /* start in DNA/codon sequence */
+#else /* TFASTYZ */
+ ap0 = f_str->aa1v; /* computed codons -> ap0*/
+ ap1 = aa0; /* protein sequence */
+ aln->smin0 = a_res.min0; /* start in protein sequence */
+ aln->smin1 = a_res.min1; /* start in codon sequence */
+#endif
+
+ rp = a_res.res; /* start of alignment info */
+
+/* now get the middle */
+ spa = seqca;
+#ifndef TFAST
+ sp0 = seqc0; /* sp0/seqc0 is codon sequence */
+ sp1 = seqc1; /* sp1/seqc1 is protein sequence */
+#else
+ sp1 = seqc0; /* sp1/seqc0 is protein sequence */
+ sp0 = seqc1; /* sp0/seqc1 is codon sequence */
+#endif
+
+ lenc = not_c = aln->nident = aln->nsim = ngap_d = ngap_p = nfs = 0;
+ i0 = a_res.min1-3; /* start of codon sequence */
+ i1 = a_res.min0; /* start of protein sequence */
+
+ while (rp < rpmax ) {
+ switch (*rp++) {
+ case 3: /* match */
+ i0 += 3;
+ *sp1 = sq[aap=ap1[i1++]];
+ *sp0 = f_str->weight_c[aap][ap0[i0]].c5;
+
+ if ((itmp=pst.pam2[0][aap][pascii[*sp0]])<0) { *spa = M_NEG; }
+ else if (itmp == 0) { *spa = M_ZERO;}
+ else {*spa = M_POS;}
+ if (*spa == M_ZERO || *spa == M_POS) { aln->nsim++;}
+
+ if (toupper(*sp0) == toupper(*sp1)) {aln->nident++; *spa = M_IDENT;}
+ sp0++; sp1++; spa++;
+ lenc++;
+ break;
+ case 2: /* frame shift +2, then match */
+ nfs++;
+ i0 += 2;
+ *sp0++ = '/';
+ *sp1++ = '-';
+ *spa++ = M_DEL;
+ not_c++;
+ *sp1 = sq[aap=ap1[i1++]];
+ *sp0 = f_str->weight_c[aap][ap0[i0]].c2;
+ if ((itmp=pst.pam2[0][aap][pascii[*sp0]])<0) { *spa = M_NEG; }
+ else if (itmp == 0) { *spa = M_ZERO;}
+ else {*spa = M_POS;}
+
+ if (toupper(*sp0) == toupper(*sp1)) {aln->nident++; *spa = M_IDENT;}
+ sp0++; sp1++; spa++;
+ lenc++;
+ break;
+ case 4: /* frame shift, -1, then match */
+ nfs++;
+ i0 += 4;
+ *sp0++ = '\\';
+ *sp1++ = '-';
+ *spa++ = M_DEL;
+ not_c++;
+ *sp1 = sq[aap=ap1[i1++]];
+ *sp0 = f_str->weight_c[aap][ap0[i0]].c4;
+ if ((itmp=pst.pam2[0][aap][pascii[*sp0]])<0) { *spa = M_NEG; }
+ else if (itmp == 0) { *spa = M_ZERO;}
+ else {*spa = M_POS;}
+
+ if (toupper(*sp0) == toupper(*sp1)) {aln->nident++; *spa = M_IDENT;}
+ sp0++; sp1++; spa++;
+ lenc++;
+ break;
+ case 5: /* insertion in 1 */
+ i0 += 3;
+ *sp0++ = f_str->weight_c[0][ap0[i0]].c3;
+ *sp1++ = '-';
+ *spa++ = M_DEL;
+ lenc++;
+ ngap_p++;
+ break;
+ case 0: /* insertion in 0 */
+ *sp0++ = '-';
+ *sp1++ = sq[ap1[i1++]];
+ *spa++ = M_DEL;
+ lenc++;
+ ngap_d++;
+ break;
+ }
+ }
+
+ *spa = '\0';
+
+#ifndef TFAST
+ aln->amax0 = i0+3; /* end of codon sequence */
+ aln->amax1 = i1; /* end of protein sequence */
+ aln->ngap_q = ngap_d;
+ aln->ngap_l = ngap_p;
+#else
+ aln->amax1 = i0+3; /* end of codon sequence */
+ aln->amax0 = i1; /* end of protein sequence */
+ aln->ngap_q = ngap_p;
+ aln->ngap_l = ngap_d;
+#endif
+ aln->nfs = nfs;
+ aln->amin0 = aln->smin0;
+ aln->amin1 = aln->smin1;
+
+ if (lenc < 0) lenc = 1;
+
+ *nc = lenc;
+/* now we have the middle, get the right end */
+
+ return lenc+not_c;
+}
+
+int calcons_a(const unsigned char *aa0, unsigned char *aa0a, int n0,
+ const unsigned char *aa1, int n1,
+ int *nc,
+ struct a_struct *aln,
+ struct a_res_str a_res,
+ struct pstruct pst,
+ char *seqc0, char *seqc0a, char *seqc1, char *seqca,
+ char *ann_arr, struct f_struct *f_str)
+{
+ int i0, i1;
+ int lenc, not_c, itmp, ngap_p, ngap_d, nfs;
+ char *sp0, *sp0a, *sp1, *spa, *sq;
+ unsigned char aap;
+ const unsigned char *ap0, *ap1;
+ int *rp, *rpmax;
+
+ /* don't fill in the ends */
+
+ rpmax = &a_res.res[a_res.nres]; /* end of alignment info */
+
+ if (pst.ext_sq_set) {sq = pst.sqx;}
+ else {sq = pst.sq;}
+
+ /* res[0] has start of protein sequence */
+ /* res[1] has start of translated DNA sequence */
+
+#ifndef TFAST
+ ap0 = f_str->aa0v; /* computed codons -> ap0*/
+ ap1 = aa1; /* protein sequence -> ap1 */
+ aln->smin1 = a_res.min0; /* start in protein sequence */
+ aln->smin0= a_res.min1; /* start in DNA/codon sequence */
+#else /* TFASTYZ */
+ ap0 = f_str->aa1v; /* computed codons -> ap0*/
+ ap1 = aa0; /* protein sequence */
+ aln->smin0 = a_res.min0; /* start in protein sequence */
+ aln->smin1 = a_res.min1; /* start in codon sequence */
+#endif
+
+ rp = a_res.res; /* start of alignment info */
+
+
+/* now get the middle */
+ spa = seqca;
+ sp0a = seqc0a;
+#ifndef TFAST
+ sp0 = seqc0; /* sp0/seqc0 is codon sequence */
+ sp1 = seqc1; /* sp1/seqc1 is protein sequence */
+#else
+ sp1 = seqc0; /* sp1/seqc0 is protein sequence */
+ sp0 = seqc1; /* sp0/seqc1 is codon sequence */
+#endif
+
+ lenc = not_c = aln->nident = aln->nsim = ngap_d = ngap_p = nfs = 0;
+ i0 = a_res.min1-3; /* start of codon sequence */
+ i1 = a_res.min0; /* start of protein sequence */
+
+ while (rp < rpmax ) {
+ switch (*rp++) {
+ case 3: /* match */
+ i0 += 3;
+ *sp0a++ = ' ';
+ *sp1 = sq[aap=ap1[i1++]];
+ *sp0 = f_str->weight_c[aap][ap0[i0]].c5;
+
+ if ((itmp=pst.pam2[0][aap][pascii[*sp0]])<0) { *spa = M_NEG; }
+ else if (itmp == 0) { *spa = M_ZERO;}
+ else {*spa = M_POS;}
+ if (*spa == M_ZERO || *spa == M_POS) { aln->nsim++;}
+
+ if (toupper(*sp0) == toupper(*sp1)) {aln->nident++; *spa = M_IDENT;}
+ sp0++; sp1++; spa++;
+ lenc++;
+ break;
+ case 2: /* frame shift +2, then match */
+ nfs++;
+ i0 += 2;
+ *sp0a++ = ' ';
+ *sp0++ = '/';
+ *sp1++ = '-';
+ *spa++ = M_DEL;
+ not_c++;
+
+#ifndef TFAST
+ *sp0a++ = ' ';
+#else
+ *sp0a++ = ann_arr[aa0a[i1]];
+#endif
+ *sp1 = sq[aap=ap1[i1++]];
+ *sp0 = f_str->weight_c[aap][ap0[i0]].c2;
+ if ((itmp=pst.pam2[0][aap][pascii[*sp0]])<0) { *spa = M_NEG; }
+ else if (itmp == 0) { *spa = M_ZERO;}
+ else {*spa = M_POS;}
+ if (*spa == M_ZERO || *spa == M_POS) { aln->nsim++;}
+
+ if (toupper(*sp0) == toupper(*sp1)) {aln->nident++; *spa = M_IDENT;}
+ sp0++; sp1++; spa++;
+ lenc++;
+ break;
+ case 4: /* frame shift, -1, then match */
+ nfs++;
+ i0 += 4;
+#ifndef TFAST
+ *sp0a++ = ' ';
+#else
+ *sp0a++ = ann_arr[aa0a[i1]];
+#endif
+ *sp0++ = '\\';
+ *sp1++ = '-';
+ *spa++ = M_DEL;
+ not_c++;
+ *sp1 = sq[aap=ap1[i1++]];
+ *sp0 = f_str->weight_c[aap][ap0[i0]].c4;
+ if ((itmp=pst.pam2[0][aap][pascii[*sp0]])<0) { *spa = M_NEG; }
+ else if (itmp == 0) { *spa = M_ZERO;}
+ else {*spa = M_POS;}
+ if (*spa == M_ZERO || *spa == M_POS) { aln->nsim++;}
+
+ if (toupper(*sp0) == toupper(*sp1)) {aln->nident++; *spa = M_IDENT;}
+ sp0++; sp1++; spa++;
+ lenc++;
+ break;
+ case 5: /* insertion in 1 */
+ i0 += 3;
+ *sp0++ = f_str->weight_c[0][ap0[i0]].c3;
+ *sp1++ = '-';
+ *spa++ = M_DEL;
+ *sp0a++ = ' ';
+ lenc++;
+ ngap_p++;
+ break;
+ case 0: /* insertion in 0 */
+ *sp0++ = '-';
+#ifndef TFAST
+ *sp0a++ = ' ';
+#else
+ *sp0a++ = ann_arr[aa0a[i1]];
+#endif
+ *sp1++ = sq[ap1[i1++]];
+ *spa++ = M_DEL;
+ lenc++;
+ ngap_d++;
+ break;
+ }
+ }
+
+ *sp0a = *spa = '\0';
+
+#ifndef TFAST
+ aln->amax0 = i0+3; /* end of codon sequence */
+ aln->amax1 = i1; /* end of protein sequence */
+ aln->ngap_q = ngap_d;
+ aln->ngap_l = ngap_p;
+#else
+ aln->amax1 = i0+3; /* end of codon sequence */
+ aln->amax0 = i1; /* end of protein sequence */
+ aln->ngap_q = ngap_p;
+ aln->ngap_l = ngap_d;
+#endif
+ aln->nfs = nfs;
+ aln->amin0 = aln->smin0;
+ aln->amin1 = aln->smin1;
+
+ if (lenc < 0) lenc = 1;
+
+ *nc = lenc;
+/* now we have the middle, get the right end */
+
+ return lenc+not_c;
+}
+
+void
+update_code(char *al_str, int al_str_max, int op, int op_cnt, char *op_char) {
+
+ char tmp_cnt[20];
+
+ sprintf(tmp_cnt,"%c%d",op_char[op],op_cnt);
+ strncat(al_str,tmp_cnt,al_str_max);
+}
+
+/* build an array of match/ins/del - length strings */
+int calc_code(const unsigned char *aa0, int n0,
+ const unsigned char *aa1, int n1,
+ struct a_struct *aln,
+ struct a_res_str a_res,
+ struct pstruct pst,
+ char *al_str, int al_str_n, struct f_struct *f_str)
+{
+ int i0, i1;
+ int lenc, not_c, itmp, ngap_d, ngap_p, nfs;
+ int op, op_cnt;
+ char sp0, sp1, op_char[10];
+ unsigned char aap;
+ const unsigned char *ap0, *ap1;
+ int *rp, *rpmax;
+
+ /* don't fill in the ends */
+
+#ifndef TFAST
+ strncpy(op_char,"- /=\\+*",sizeof(op_char));
+ ap0 = f_str->aa0v; /* computed codons -> ap0*/
+ ap1 = aa1; /* protein sequence -> ap1 */
+ aln->smin1 = a_res.min0; /* start in protein sequence */
+ aln->smin0= a_res.min1; /* start in DNA/codon sequence */
+#else /* TFASTYZ */
+ strncpy(op_char,"+ /=\\-*",sizeof(op_char));
+ ap0 = f_str->aa1v; /* computed codons -> ap0*/
+ ap1 = aa0; /* protein sequence */
+ aln->smin0 = a_res.min0; /* start in protein sequence */
+ aln->smin1 = a_res.min1; /* start in codon sequence */
+#endif
+
+ rp = a_res.res; /* start of alignment info */
+ rpmax = &a_res.res[a_res.nres]; /* end of alignment info */
+
+/* now get the middle */
+
+ lenc = not_c = aln->nident = aln->nsim = ngap_d = ngap_p = nfs = 0;
+ op_cnt = 0;
+ op = 3;
+
+ i0 = a_res.min1-3; /* start of codon sequence */
+ i1 = a_res.min0; /* start of protein sequence */
+
+ while (rp < rpmax ) {
+ switch (*rp++) {
+ case 3: /* match */
+ sp1 = pst.sq[aap=ap1[i1++]];
+ i0 += 3;
+ sp0 = f_str->weight_c[aap][ap0[i0]].c5;
+ if (pst.pam2[0][aap][pascii[sp0]]>=0) { aln->nsim++; }
+
+ if (op == 3 || op == 6) {
+ if (sp0 != '*' && sp1 != '*') {
+ if (op == 6 ) {
+ update_code(al_str, al_str_n-strlen(al_str),op, op_cnt,op_char);
+ op_cnt = 1; op = 3;
+ }
+ else {op_cnt++;}
+ }
+ else {
+ update_code(al_str, al_str_n-strlen(al_str),op, op_cnt,op_char);
+ op_cnt = 1; op = 6;
+ }
+ }
+ else {
+ update_code(al_str, al_str_n-strlen(al_str),op, op_cnt, op_char);
+ op_cnt = 1; op = 3;
+ }
+ if (sp0 == sp1) aln->nident++;
+ lenc++;
+ break;
+ case 2: /* -1 frame shift */
+ update_code(al_str, al_str_n-strlen(al_str),op, op_cnt, op_char);
+ op = 2; op_cnt = 1;
+ update_code(al_str, al_str_n-strlen(al_str),op, op_cnt, op_char);
+ op = 3; op_cnt = 1;
+
+ nfs++;
+ i0 += 2;
+ not_c++;
+ sp1 = pst.sq[aap=ap1[i1++]];
+ sp0 = f_str->weight_c[aap][ap0[i0]].c2;
+ if (pst.pam2[0][aap][pascii[sp0]]>=0) { aln->nsim++; }
+ if (sp0 == sp1) aln->nident++;
+ lenc++;
+ break;
+ case 4: /* +1 frame shift */
+ update_code(al_str, al_str_n-strlen(al_str),op, op_cnt, op_char);
+ op = 4; op_cnt = 1;
+ update_code(al_str, al_str_n-strlen(al_str),op, op_cnt, op_char);
+ op = 3; op_cnt = 1;
+
+ nfs++;
+ i0 += 4;
+ not_c++;
+ sp1 = pst.sq[aap=ap1[i1++]];
+ sp0 = f_str->weight_c[aap][ap0[i0]].c4;
+ if (pst.pam2[0][aap][pascii[sp0]]>=0) { aln->nsim++; }
+ if (sp0 == sp1) aln->nident++;
+ lenc++;
+ break;
+ case 5: /* insert in 1 */
+ if (op == 5) op_cnt++;
+ else {
+ update_code(al_str, al_str_n-strlen(al_str),op, op_cnt, op_char);
+ op = 5; op_cnt = 1;
+ }
+
+ i0 += 3;
+ lenc++;
+ ngap_p++;
+ break;
+ case 0: /* insert in 0 */
+ if (op == 0) op_cnt++;
+ else {
+ update_code(al_str, al_str_n-strlen(al_str),op, op_cnt, op_char);
+ op = 0; op_cnt = 1;
+ }
+
+ i1++;
+ lenc++;
+ ngap_d++;
+ break;
+ }
+ }
+
+ update_code(al_str, al_str_n-strlen(al_str),op, op_cnt, op_char);
+
+#ifndef TFAST
+ aln->amax0 = i0+3; /* end of codon sequence */
+ aln->amax1 = i1; /* end of protein sequence */
+ aln->ngap_q = ngap_d;
+ aln->ngap_l = ngap_p;
+#else
+ aln->amax1 = i0+3; /* end of codon sequence */
+ aln->amax0 = i1; /* end of protein sequence */
+ aln->ngap_q = ngap_p;
+ aln->ngap_l = ngap_d;
+#endif
+ aln->nfs = nfs;
+ aln->amin0 = aln->smin0;
+ aln->amin1 = aln->smin1;
+
+ if (lenc < 0) lenc = 1;
+
+/* now we have the middle, get the right end */
+
+ return lenc;
+}
+
+int calc_id(const unsigned char *aa0, int n0,
+ const unsigned char *aa1, int n1,
+ struct a_struct *aln,
+ struct a_res_str a_res,
+ struct pstruct pst,
+ struct f_struct *f_str)
+{
+ int i0, i1;
+ int lenc, not_c, itmp, ngap_d, ngap_p, nfs;
+ char sp0, sp1;
+ unsigned char aap;
+ const unsigned char *ap0, *ap1;
+ int *rp, *rpmax;
+
+ /* don't fill in the ends */
+
+#ifndef TFAST /* FASTYZ */
+ ap0 = f_str->aa0v; /* computed codons -> ap0*/
+ ap1 = aa1; /* protein sequence -> ap1 */
+ aln->smin1 = a_res.min0; /* start in protein sequence */
+ aln->smin0 = a_res.min1; /* start in DNA/codon sequence */
+#else /* TFASTYZ */
+ ap0 = f_str->aa1v; /* computed codons -> ap0*/
+ ap1 = aa0; /* protein sequence */
+ aln->smin0 = a_res.min0; /* start in protein sequence */
+ aln->smin1 = a_res.min1; /* start in codon sequence */
+#endif
+
+ rp = a_res.res; /* start of alignment info */
+ rpmax = &a_res.res[a_res.nres]; /* end of alignment info */
+
+/* now get the middle */
+
+ lenc = not_c = aln->nident = aln->nsim = ngap_d = ngap_p = nfs = 0;
+ i0 = a_res.min1-3; /* start of codon sequence */
+ i1 = a_res.min0; /* start of protein sequence */
+
+ while (rp < rpmax ) {
+ switch (*rp++) {
+ case 3:
+ i0 += 3;
+ sp1 = pst.sq[aap=ap1[i1++]];
+ sp0 = f_str->weight_c[aap][ap0[i0]].c5;
+ if (pst.pam2[0][aap][pascii[sp0]]>=0) { aln->nsim++; }
+ if (sp0 == sp1) aln->nident++;
+ lenc++;
+ break;
+ case 2:
+ nfs++;
+ i0 += 2;
+ not_c++;
+ sp1 = pst.sq[aap=ap1[i1++]];
+ sp0 = f_str->weight_c[aap][ap0[i0]].c2;
+ if (pst.pam2[0][aap][pascii[sp0]]>=0) { aln->nsim++; }
+ if (sp0 == sp1) aln->nident++;
+ lenc++;
+ break;
+ case 4:
+ nfs++;
+ i0 += 4;
+ not_c++;
+ sp1 = pst.sq[aap=ap1[i1++]];
+ sp0 = f_str->weight_c[aap][ap0[i0]].c4;
+ if (pst.pam2[0][aap][pascii[sp0]]>=0) { aln->nsim++; }
+ if (sp0 == sp1) aln->nident++;
+ lenc++;
+ break;
+ case 5:
+ i0 += 3;
+ lenc++;
+ ngap_p++;
+ break;
+ case 0:
+ i1++;
+ lenc++;
+ ngap_d++;
+ break;
+ }
+ }
+
+#ifndef TFAST
+ aln->amax0 = i0+3; /* end of codon sequence */
+ aln->amax1 = i1; /* end of protein sequence */
+ aln->ngap_q = ngap_d;
+ aln->ngap_l = ngap_p;
+#else
+ aln->amax1 = i0+3; /* end of codon sequence */
+ aln->amax0 = i1; /* end of protein sequence */
+ aln->ngap_q = ngap_p;
+ aln->ngap_l = ngap_d;
+#endif
+ aln->nfs = nfs;
+ aln->amin0 = aln->smin0;
+ aln->amin1 = aln->smin1;
+
+ if (lenc < 0) lenc = 1;
+
+/* now we have the middle, get the right end */
+
+ return lenc;
+}
+
+#ifdef PCOMPLIB
+#include "p_mw.h"
+void
+update_params(struct qmng_str *qm_msg, struct pstruct *ppst)
+{
+ ppst->n0 = qm_msg->n0;
+}
+#endif