1 /* copyright (c) 1996 William R. Pearson */
3 /* $Name: fa_34_26_5 $ - $Id: dropgsw.c,v 1.80 2006/10/19 15:12:11 wrp Exp $ */
5 /* 17-Aug-2006 - removed globals *sapp/last - alignment should be thread safe */
7 /* 12-Oct-2005 - converted to use a_res and aln for alignment coordinates */
9 /* 4-Nov-2004 - Diagonal Altivec Smith-Waterman included */
11 /* 14-May-2003 - modified to return alignment start at 0, rather than
12 1, for begin:end alignments
14 25-Feb-2003 - modified to support Altivec parallel Smith-Waterman
16 22-Sep-2003 - removed Altivec support at request of Sencel lawyers
19 /* the do_walign() code in this file is not thread_safe */
20 /* init_work(), do_work(), are thread safe */
22 /* this code uses an implementation of the Smith-Waterman algorithm
23 designed by Phil Green, U. of Washington, that is 1.5 - 2X faster
24 than my Miller and Myers implementation. */
26 /* the shortcuts used in this program prevent it from calculating scores
27 that are less than the gap penalty for the first residue in a gap. As
28 a result this code cannot be used with very large gap penalties, or
29 with very short sequences, and probably should not be used with prss3.
32 /* version 3.2 fixes a subtle bug that was encountered while running
33 do_walign() interspersed with do_work(). This happens only with -m
34 9 and pvcomplib. The fix was to more explicitly zero-out ss[] at
35 the beginning of do_work.
47 static char *verstr="5.5 Sept 2006";
52 #include "drop_func.h"
55 #include "smith_waterman_altivec.h"
58 #include "smith_waterman_sse2.h"
61 struct swstr {int H, E;};
63 extern void init_karlin(const unsigned char *aa0, int n0, struct pstruct *ppst,
64 double *aa0_f, double **kp);
65 extern int do_karlin(const unsigned char *aa1, int n1,
66 int **pam2, struct pstruct *ppst,
67 double *aa0_f, double *kar_p, double *lambda, double *H);
70 ALIGN(const unsigned char *A, const unsigned char *B,
72 int **W, int IW, int G, int H, int *res, int *nres,
73 struct f_struct *f_str);
76 FLOCAL_ALIGN(const unsigned char *aa0, const unsigned char *aa1,
77 int n0, int n1, int low, int up,
78 int **W, int GG,int HH, int MW,
79 struct f_struct *f_str);
82 void DISPLAY(const unsigned char *A, const unsigned char *B,
84 int *S, int AP, int BP, char *sq);
86 extern void aancpy(char *to, char *from, int count, struct pstruct pst);
88 /* initialize for Smith-Waterman optimal score */
91 init_work (unsigned char *aa0, int n0,
93 struct f_struct **f_arg)
99 struct f_struct *f_str;
104 #if defined(SW_ALTIVEC) || defined(SW_SSE2)
114 if (ppst->ext_sq_set) {
115 nsq = ppst->nsqx; ip = 1;
118 nsq = ppst->nsq; ip = 0;
121 /* allocate space for function globals */
123 f_str = (struct f_struct *)calloc(1,sizeof(struct f_struct));
125 if(ppst->zsflag == 6 || ppst->zsflag == 16) {
127 init_karlin(aa0, n0, ppst, &f_str->aa0_f[0], &f_str->kar_p);
130 /* allocate space for the scoring arrays */
131 if ((ss = (struct swstr *) calloc (n0+2, sizeof (struct swstr)))
133 fprintf (stderr, "cannot allocate ss array %3d\n", n0);
138 ss[n0].H = -1; /* this is used as a sentinel - normally H >= 0 */
142 /* initialize variable (-S) pam matrix */
143 if ((f_str->waa_s= (int *)calloc((nsq+1)*(n0+1),sizeof(int))) == NULL) {
144 fprintf(stderr,"cannot allocate waa_s array %3d\n",nsq*n0);
148 /* initialize pam2p[1] pointers */
149 if ((f_str->pam2p[1]= (int **)calloc((n0+1),sizeof(int *))) == NULL) {
150 fprintf(stderr,"cannot allocate pam2p[1] array %3d\n",n0);
154 pam2p = f_str->pam2p[1];
155 if ((pam2p[0]=(int *)calloc((nsq+1)*(n0+1),sizeof(int))) == NULL) {
156 fprintf(stderr,"cannot allocate pam2p[1][] array %3d\n",nsq*n0);
160 for (i=1; i<n0; i++) {
161 pam2p[i]= pam2p[0] + (i*(nsq+1));
164 /* initialize universal (alignment) matrix */
165 if ((f_str->waa_a= (int *)calloc((nsq+1)*(n0+1),sizeof(int))) == NULL) {
166 fprintf(stderr,"cannot allocate waa_a struct %3d\n",nsq*n0);
170 /* initialize pam2p[0] pointers */
171 if ((f_str->pam2p[0]= (int **)calloc((n0+1),sizeof(int *))) == NULL) {
172 fprintf(stderr,"cannot allocate pam2p[1] array %3d\n",n0);
176 pam2p = f_str->pam2p[0];
177 if ((pam2p[0]=(int *)calloc((nsq+1)*(n0+1),sizeof(int))) == NULL) {
178 fprintf(stderr,"cannot allocate pam2p[1][] array %3d\n",nsq*n0);
182 for (i=1; i<n0; i++) {
183 pam2p[i]= pam2p[0] + (i*(nsq+1));
187 pwaa effectively has a sequence profile --
188 pwaa[0..n0-1] has pam score for residue 0 (-BIGNUM)
189 pwaa[n0..2n0-1] has pam scores for residue 1 (A)
190 pwaa[2n0..3n-1] has pam scores for residue 2 (R), ...
192 thus: pwaa = f_str->waa_s + (*aa1p++)*n0; sets up pwaa so that
193 *pwaa++ rapidly moves though the scores of the aa1p[] position
194 without further indexing
196 For a real sequence profile, pwaa[0..n0-1] vs ['A'] could have
197 a different score in each position.
200 if (ppst->pam_pssm) {
201 pwaa_s = f_str->waa_s;
202 pwaa_a = f_str->waa_a;
203 for (e = 0; e <=nsq; e++) { /* for each residue in the alphabet */
204 for (f = 0; f < n0; f++) { /* for each position in aa0 */
205 *pwaa_s++ = f_str->pam2p[ip][f][e] = ppst->pam2p[ip][f][e];
206 *pwaa_a++ = f_str->pam2p[0][f][e] = ppst->pam2p[0][f][e];
210 else { /* initialize scanning matrix */
211 pwaa_s = f_str->waa_s;
212 pwaa_a = f_str->waa_a;
213 for (e = 0; e <=nsq; e++) /* for each residue in the alphabet */
214 for (f = 0; f < n0; f++) { /* for each position in aa0 */
215 *pwaa_s++ = f_str->pam2p[ip][f][e]= ppst->pam2[ip][aa0[f]][e];
216 *pwaa_a++ = f_str->pam2p[0][f][e] = ppst->pam2[0][aa0[f]][e];
220 #if defined(SW_ALTIVEC)
222 /* First we allocate memory for the workspace - i.e. the single row
223 * of storage for H/F. Since this might be run on Linux or AIX too,
224 * we don't assume anything about the memory allocation but align
225 * it ourselves. We need two vectors (16 bytes each) per element,
226 * and some padding space to make it cache-line aligned.
228 * MAXTST+MAXLIB is longest allowed database sequence length...
229 * this should be m_msg.max_tot, but m_msg is not available, but
230 * ppst->maxlen has maxn, which is appropriate.
233 f_str->workspace_memory = (void *)malloc(2*16*(ppst->maxlen+SEQ_PAD)+256);
234 f_str->workspace = (void *) ((((size_t) f_str->workspace_memory) + 255) & (~0xff));
238 /* We always use a scoring profile in altivec, but the layout is a bit strange
239 * in order to optimize memory access order and thus cache efficiency.
240 * Normally we first try 8-bit scoring in altivec, and if this leads to overflow
241 * we recompute the score with 16-bit accuracy. Because of this we need to construct
242 * two score profiles.
243 * Since altivec always loads 16 bytes from aligned memory, corresponding to 8 or 16
244 * elements (for 16 and 8 bit scoring, respectively), we organize the scoring
245 * profile like this for 8-bit accuracy:
247 * 1. The profile starts on 256-byte aligned memory (cache line on G5 is 128 bytes).
248 * 2. First we have the score for the full alphabet for the first 16 residues of
249 * the query, i.e. positions 0-15 are the scores for the first 16 query letters
250 * vs. the first in the alphabet, positions 16-31 the scores for the same 16
251 * query positions against alphabet letter two, etc.
252 * 3. After alphabet_size*16bytes we start with the scores for residues 16-31 in
253 * the query, organized in the same way.
254 * 4. At the end of the query sequence, we pad the scoring to the next 16-tuple
255 * with neutral scores.
256 * 5. The total size of the profile is thus alphabet_size*N, where N is the
257 * size of the query rounded up to the next 16-tuple.
259 * The word (16-bit) profile is identical, but scores are stored as 8-tuples.
262 f_str->word_score_memory = (void *)malloc(10*2*(nsq+2)*(n0+1+16)+256);
263 f_str->byte_score_memory = (void *)malloc(10*(nsq+2)*(n0+1+16)+256);
265 f_str->word_score = (unsigned short *) ((((size_t) f_str->word_score_memory) + 255) & (~0xff));
266 f_str->byte_score = (unsigned char *) ((((size_t) f_str->byte_score_memory) + 255) & (~0xff));
270 if (ppst->pam_pssm) {
271 /* Use a position-specific scoring profile.
272 * This is essentially what we are going to construct anyway, but we'll
273 * reorder it to suit altivec.
276 for(i = 1; i <= nsq ; i++) {
277 for(j = 0; j < n0 ; j++) {
278 data = ppst->pam2p[ip][j][i];
279 if(data<bias) bias = data;
283 /* Fill our specially organized byte- and word-size scoring arrays. */
284 ps = f_str->word_score;
285 for(f = 0; f<n0 ; f+=8) {
287 for(i=0 ; i<8 ; i++) {
288 *ps++ = (unsigned short) 0;
290 /* for each chunk of 8 residues in our query */
291 for(e = 1; e<=nsq; e++) {
292 for(i=0 ; i<8 ; i++) {
295 data = ppst->pam2p[ip][l][e] - bias;
300 *ps++ = (unsigned short)data;
304 pc = f_str->byte_score;
305 for(f = 0; f<n0 ; f+=16) {
307 for(i=0 ; i<16 ; i++) {
308 *pc++ = (unsigned char)0;
311 for(e = 1; e<=nsq; e++) {
312 for(i=0 ; i<16 ; i++) {
315 data = ppst->pam2p[ip][l][e] - bias;
322 printf("Fatal error. data: %d bias: %d, position: %d/%d, Score out of range for 8-bit Altivec/VMX datatype.\n",data,bias,l,e);
327 *pc++ = (unsigned char)data;
333 /* Classical simple substitution matrix */
334 /* Find the bias to use in the substitution matrix */
336 for(i = 1; i <= nsq ; i++) {
337 for(j = 1; j <= nsq ; j++) {
338 data = ppst->pam2[ip][i][j];
339 if(data<bias) bias = data;
342 /* Fill our specially organized byte- and word-size scoring arrays. */
343 ps = f_str->word_score;
344 for(f = 0; f<n0 ; f+=8) {
346 for(i=0 ; i<8 ; i++) {
347 *ps++ = (unsigned short) 0;
349 /* for each chunk of 8 residues in our query */
350 for(e = 1; e<=nsq; e++) {
351 for(i=0 ; i<8 ; i++) {
354 data = ppst->pam2[ip][aa0[l]][e] - bias;
359 *ps++ = (unsigned short)data;
363 pc = f_str->byte_score;
364 for(f = 0; f<n0 ; f+=16) {
366 for(i=0 ; i<16 ; i++) {
367 *pc++ = (unsigned char)0;
370 for(e = 1; e<=nsq; e++) {
371 for(i=0 ; i<16 ; i++) {
374 data = ppst->pam2[ip][aa0[l]][e] - bias;
381 printf("Fatal error. Score out of range for 8-bit Altivec/VMX datatype.\n");
386 *pc++ = (unsigned char)data;
392 f_str->bias = (unsigned char) (-bias);
393 f_str->alphabet_size = nsq+1;
395 /* Some variable to keep track of how many 8-bit runs we need to rerun
396 * in 16-bit accuracy. If there are too many reruns it can be faster
397 * to use 16-bit alignments directly.
400 /* We can only do 8-bit alignments if the scores were small enough. */
401 if(overflow==0) f_str->try_8bit = 1;
402 else f_str->try_8bit = 0;
404 f_str->done_8bit = 0;
405 f_str->done_16bit = 0;
407 #endif /* SW_ALTIVEC */
410 /* First we allocate memory for the workspace - i.e. two rows for H and
411 * one row for F. We also need enough space to hold a temporary
412 * scoring profile which will be query_length * 16 (sse2 word length).
413 * Since this might be run on Linux or AIX too, we don't assume
414 * anything about the memory allocation but align it ourselves.
416 f_str->workspace_memory = (void *)malloc(3*16*(MAXTST+MAXLIB+32)+256);
417 f_str->workspace = (void *) ((((size_t) f_str->workspace_memory) + 255) & (~0xff));
419 /* We always use a scoring profile for the SSE2 implementation, but the layout
420 * is a bit strange. The scoring profile is parallel to the query, but is
421 * accessed in a stripped pattern. The query is divided into equal length
422 * segments. The number of segments is equal to the number of elements
423 * processed in the SSE2 register. For 8-bit calculations, the query will
424 * be divided into 16 equal length parts. If the query is not long enough
425 * to fill the last segment, it will be filled with neutral weights. The
426 * first element in the SSE register will hold a value from the first segment,
427 * the second element of the SSE register will hold a value from the
428 * second segment and so on. So if the query length is 288, then each
429 * segment will have a length of 18. So the first 16 bytes will have
430 * the following weights: Q1, Q19, Q37, ... Q271; the next 16 bytes will
431 * have the following weights: Q2, Q20, Q38, ... Q272; and so on until
432 * all parts of all segments have been written. The last seqment will
433 * have the following weights: Q18, Q36, Q54, ... Q288. This will be
434 * done for the entire alphabet.
437 f_str->word_score_memory = (void *)malloc((n0 + 32) * sizeof (short) * (nsq + 1) + 256);
438 f_str->byte_score_memory = (void *)malloc((n0 + 32) * sizeof (char) * (nsq + 1) + 256);
440 f_str->word_score = (unsigned short *) ((((size_t) f_str->word_score_memory) + 255) & (~0xff));
441 f_str->byte_score = (unsigned char *) ((((size_t) f_str->byte_score_memory) + 255) & (~0xff));
445 if (ppst->pam_pssm) {
446 /* Use a position-specific scoring profile.
447 * This is essentially what we are going to construct anyway, but we'll
448 * reorder it to suit sse2.
451 for (i = 1; i <= nsq ; i++) {
452 for (j = 0; j < n0 ; j++) {
453 data = ppst->pam2p[ip][j][i];
460 /* Fill our specially organized byte- and word-size scoring arrays. */
461 ps = f_str->word_score;
462 col_len = (n0 + 7) / 8;
463 n_count = (n0 + 7) & 0xfffffff8;
464 for (f = 0; f < n_count; ++f) {
467 for (f = 1; f <= nsq ; f++) {
468 for (e = 0; e < col_len; e++) {
469 for (i = e; i < n_count; i += col_len) {
470 if ( i < n0) { data = ppst->pam2p[ip][i][f];}
472 *ps++ = (unsigned short)data;
476 pc = f_str->byte_score;
477 col_len = (n0 + 15) / 16;
478 n_count = (n0 + 15) & 0xfffffff0;
479 for (f = 0; f < n_count; ++f) {
482 for (f = 1; f <= nsq ; f++) {
483 for (e = 0; e < col_len; e++) {
484 for (i = e; i < n_count; i += col_len) {
485 if ( i < n0 ) { data = ppst->pam2p[ip][i][f] - bias;}
486 else {data = 0 - bias;}
488 printf("Fatal error. data: %d bias: %d, position: %d/%d, "
489 "Score out of range for 8-bit SSE2 datatype.\n",
493 *pc++ = (unsigned char)data;
500 /* Classical simple substitution matrix */
501 /* Find the bias to use in the substitution matrix */
503 for (i = 1; i <= nsq ; i++) {
504 for (j = 1; j <= nsq ; j++) {
505 data = ppst->pam2[ip][i][j];
512 /* Fill our specially organized byte- and word-size scoring arrays. */
513 ps = f_str->word_score;
514 col_len = (n0 + 7) / 8;
515 n_count = (n0 + 7) & 0xfffffff8;
516 for (f = 0; f < n_count; ++f) {
519 for (f = 1; f <= nsq ; f++) {
520 for (e = 0; e < col_len; e++) {
521 for (i = e; i < n_count; i += col_len) {
525 data = ppst->pam2[ip][aa0[i]][f];
527 *ps++ = (unsigned short)data;
532 pc = f_str->byte_score;
533 col_len = (n0 + 15) / 16;
534 n_count = (n0 + 15) & 0xfffffff0;
535 for (f = 0; f < n_count; ++f) {
538 for (f = 1; f <= nsq ; f++) {
539 for (e = 0; e < col_len; e++) {
540 for (i = e; i < n_count; i += col_len) {
544 data = ppst->pam2[ip][aa0[i]][f] - bias;
547 printf("Fatal error. data: %d bias: %d, position: %d/%d, "
548 "Score out of range for 8-bit SSE2 datatype.\n",
552 *pc++ = (unsigned char)data;
558 f_str->bias = (unsigned char) (-bias);
559 f_str->alphabet_size = nsq+1;
561 /* Some variable to keep track of how many 8-bit runs we need to rerun
562 * in 16-bit accuracy. If there are too many reruns it can be faster
563 * to use 16-bit alignments directly.
566 /* We can only do 8-bit alignments if the scores were small enough. */
567 f_str->try_8bit = (overflow == 0) ? 1 : 0;
569 f_str->done_8bit = 0;
570 f_str->done_16bit = 0;
573 /* these structures are used for producing alignments */
575 maxn0 = max(3*n0/2,MIN_RES); /* minimum allocation for alignment */
576 if ((res = (int *)calloc((size_t)maxn0,sizeof(int)))==NULL) {
577 fprintf(stderr,"cannot allocate alignment results array %d\n",maxn0);
586 void close_work (const unsigned char *aa0, int n0,
587 struct pstruct *ppst,
588 struct f_struct **f_arg)
590 struct f_struct *f_str;
595 if (f_str->kar_p !=NULL) free(f_str->kar_p);
600 free(f_str->pam2p[0][0]);
601 free(f_str->pam2p[0]);
603 free(f_str->pam2p[1][0]);
604 free(f_str->pam2p[1]);
606 #if defined(SW_ALTIVEC) || defined(SW_SSE2)
607 free(f_str->workspace_memory);
608 free(f_str->word_score_memory);
609 free(f_str->byte_score_memory);
617 /* pstring1 is a message to the manager, currently 512 */
618 /*void get_param(struct pstruct *pstr,char *pstring1)*/
619 void get_param (struct pstruct *pstr, char *pstring1, char *pstring2)
624 #if defined(SW_ALTIVEC)
625 strncpy(pg_str,"Smith-Waterman (Altivec/VMX, Erik Lindahl 2004)",sizeof(pg_str));
628 strncpy(pg_str,"Smith-Waterman (SSE2, Michael Farrar 2006)",sizeof(pg_str));
630 #if !defined(SW_ALTIVEC) && !defined(SW_SSE2)
631 strncpy(pg_str,"Smith-Waterman (PGopt)",sizeof(pg_str));
634 if (pstr->pam_pssm) { strncpy(psi_str,"-PSI",sizeof(psi_str));}
635 else { psi_str[0]='\0';}
638 sprintf (pstring1, " %s (%s) function [%s matrix%s (%d:%d)%s], gap-penalty: %d/%d",
640 sprintf (pstring1, " %s (%s) function [%s matrix%s (%d:%d)%s], open/ext: %d/%d",
642 pg_str, verstr, pstr->pamfile, psi_str, pstr->pam_h,pstr->pam_l,
643 (pstr->ext_sq_set)?"xS":"\0", pstr->gdelval, pstr->ggapval);
645 if (pstr->zsflag==0) strcat(pstring1," not-scaled\n");
646 else if (pstr->zsflag==1) strcat(pstring1," reg.-scaled");
648 if (pstring2 != NULL) {
650 sprintf(pstring2,"; pg_name: %s\n; pg_ver: %s\n; pg_matrix: %s (%d:%d)%s\n; pg_gap-pen: %d %d\n",
652 sprintf(pstring2,"; pg_name: %s\n; pg_ver: %s\n; pg_matrix: %s (%d:%d)%s\n; pg_open-ext: %d %d\n",
654 pg_str,verstr,psi_str,pstr->pam_h,pstr->pam_l,
655 (pstr->ext_sq_set)?"xS":"\0",pstr->gdelval,pstr->ggapval);
659 void do_work (const unsigned char *aa0, int n0,
660 const unsigned char *aa1, int n1,
662 struct pstruct *ppst, struct f_struct *f_str,
663 int qr_flg, struct rstruct *rst)
672 score = smith_waterman_altivec_byte(aa0,
678 #ifndef OLD_FASTA_GAP
679 -(ppst->gdelval + ppst->ggapval),
690 /* Overflow, so we have to redo it in 16 bits. */
691 score = smith_waterman_altivec_word(aa0,
697 #ifndef OLD_FASTA_GAP
698 -(ppst->gdelval + ppst->ggapval),
705 /* The 8 bit version is roughly 50% faster than the 16 bit version,
706 * so we are fine if less than about 1/3 of the runs have to
707 * be rerun with 16 bits. If it is more, and we have tried at least
708 * 500 sequences, we switch off the 8-bit mode.
711 if(f_str->done_8bit>500 && (3*f_str->done_16bit)>(f_str->done_8bit))
717 /* Just use the 16-bit altivec version directly */
718 score = smith_waterman_altivec_word(aa0,
724 #ifndef OLD_FASTA_GAP
725 -(ppst->gdelval + ppst->ggapval),
733 #endif /* not Altivec */
739 score = smith_waterman_sse2_byte(aa0,
745 #ifndef OLD_FASTA_GAP
746 -(ppst->gdelval + ppst->ggapval),
757 /* Overflow, so we have to redo it in 16 bits. */
758 score = smith_waterman_sse2_word(aa0,
763 #ifndef OLD_FASTA_GAP
764 -(ppst->gdelval + ppst->ggapval),
771 /* The 8 bit version is roughly 50% faster than the 16 bit version,
772 * so we are fine if less than about 1/3 of the runs have to
773 * be rerun with 16 bits. If it is more, and we have tried at least
774 * 500 sequences, we switch off the 8-bit mode.
777 if(f_str->done_8bit>500 && (3*f_str->done_16bit)>(f_str->done_8bit))
783 /* Just use the 16-bit altivec version directly */
784 score = smith_waterman_sse2_word(aa0,
789 #ifndef OLD_FASTA_GAP
790 -(ppst->gdelval + ppst->ggapval),
799 #if !defined(SW_ALTIVEC) && !defined(SW_SSE2)
801 score = FLOCAL_ALIGN(aa0,aa1,n0,n1,0,0,
803 #ifndef OLD_FASTA_GAP
804 -(ppst->gdelval + ppst->ggapval),
808 ppst->ggapval,0,f_str);
811 rst->score[0] = score;
813 if(( ppst->zsflag == 6 || ppst->zsflag == 16) &&
814 (do_karlin(aa1, n1, ppst->pam2[0], ppst,f_str->aa0_f,
815 f_str->kar_p, &lambda, &H)>0)) {
816 rst->comp = 1.0/lambda;
819 else {rst->comp = rst->H = -1.0;}
824 FLOCAL_ALIGN(const unsigned char *aa0, const unsigned char *aa1,
825 int n0, int n1, int low, int up,
826 int **W, int GG,int HH, int MW,
827 struct f_struct *f_str) {
830 register struct swstr *ssj;
832 register int h, e, f, p;
834 int gap_ext, n_gap_init;
836 const unsigned char *aa1p;
845 for (h=0; h<n0; h++) { /* initialize 0th row */
846 ss[h].H = ss[h].E = 0;
850 while (*aa1p) { /* relies on aa1[n1]==0 for EOS flag */
851 /* waa_s has the offsets for each residue in aa0 into pam2 */
852 /* waa_s has complexity (-S) dependent scores */
853 pwaa = f_str->waa_s + (*aa1p++)*n0;
857 zero_f: /* in this section left-gap f==0, and is never examined */
859 while (1) { /* build until h > n_gap_init (f < 0 until h > n_gap_init) */
860 /* bump through the pam[][]'s for each of the aa1[] matches to
861 aa0[], because of the way *pwaa is set up */
863 h = p + *pwaa++; /* increment diag value */
864 p = ssj->H; /* get next diag value */
865 if ((e = ssj->E) > 0 ) { /* >0 from up-gap */
866 if (p == -1) goto next_row; /* done, -1=ss[n0].H sentinel */
867 if (h < e) h = e; /* up-gap better than diag */
869 if (h > n_gap_init) { /* we won't starting a new up-gap */
870 e += gap_ext; /* but we might be extending one */
871 goto transition; /* good h > n_gap_diag; scan f */
873 e += gap_ext; /* up-gap decreased */
874 ssj->E = (e > 0) ? e : 0; /* set to 0 if < 0 */
875 ssj++->H = h; /* diag match updated */
877 else { /* up-gap (->E) is 0 */
878 if ( h > 0) { /* diag > 0 */
879 if (h > n_gap_init) { /* we won't be starting a new up-gap */
880 e = 0; /* and we won't be extending one */
881 goto transition; /* good h > n_gap_diag; scan f */
883 ssj++->H = h; /* update diag */
885 else ssj++->H = 0; /* update diag to 0 */
889 /* here h > n_gap_init and h > e, => the next f will be > 0 */
893 fprintf(stderr,"h: %d ssj: %d\n",h, (int)(ssj-ss));
895 if ( score < h ) score = h; /* save best score, only when h > n_gap_init */
897 temp = h - n_gap_init; /* best score for starting a new gap */
898 if ( f < temp ) f = temp; /* start a left-gap? */
899 if ( e < temp ) e = temp; /* start an up-gap? */
900 ssj->E = ( e > 0 ) ? e : 0; /* update up-gap */
901 ssj++->H = h; /* update diag */
904 do { /* stay here until f <= 0 */
905 h = p + *pwaa++; /* diag + match/mismatch */
906 p = ssj->H; /* save next (right) diag */
908 if ( h < f ) h = f; /* update diag using left gap */
909 f += gap_ext; /* update next left-gap */
911 if ((e = ssj->E) > 0) { /* good up gap */
912 if (p == -1) goto next_row; /* at the end of the row */
913 if ( h < e ) h = e; /* update diag using up-gap */
915 if ( h > n_gap_init ) {
916 e += gap_ext; /* update up gap */
917 goto transition; /* good diag > n_gap_init, restart */
919 e += gap_ext; /* update up-gap */
920 ssj->E = (e > 0) ? e : 0; /* e must be >= 0 */
921 ssj++->H = h; /* update diag */
923 else { /* up-gap <= 0 */
924 if ( h > n_gap_init ) {
926 goto transition; /* good diag > n_gap_init; restart */
928 ssj++->H = h; /* update diag */
930 } while ( f > 0 ); /* while left gap f > 0 */
931 goto zero_f; /* otherwise, go to f==0 section */
934 } /* end while(*aap1) {} */
938 } /* here we should be all done */
940 void do_opt (const unsigned char *aa0, int n0,
941 const unsigned char *aa1, int n1,
943 struct pstruct *ppst, struct f_struct *f_str,
948 int do_walign (const unsigned char *aa0, int n0,
949 const unsigned char *aa1, int n1,
951 struct pstruct *ppst,
952 struct f_struct *f_str,
953 struct a_res_str *a_res,
956 const unsigned char *aa0p, *aa1p;
959 register struct swstr *ssj;
965 int cost, I, J, K, L;
970 waa = f_str->waa_a; /* this time use universal pam2[0] */
974 q = -(ppst->gdelval - ppst->ggapval);
982 /* initialize 0th row */
983 for (ssj=ss; ssj<ss+n0; ssj++) {
994 pwaa = waa + (*aa1p++ * n0);
995 for (ssj = ss, aa0p = aa0; ssj < ss+n0; ssj++) {
996 if ((h = h - m) > /* gap open from left best */
997 /* gap extend from left gapped */
998 (f = f - r)) f = h; /* if better, use new gap opened */
999 if ((h = ssj->H - m) > /* gap open from up best */
1000 /* gap extend from up gap */
1001 (e = ssj->E - r)) e = h; /* if better, use new gap opened */
1002 h = p + *pwaa++; /* diagonal match */
1003 if (h < 0 ) h = 0; /* ? < 0, reset to 0 */
1004 if (h < f ) h = f; /* left gap better, reset */
1005 if (h < e ) h = e; /* up gap better, reset */
1006 p = ssj->H; /* save previous best score */
1007 ssj->H = h; /* save (new) up diag-matched */
1008 ssj->E = e; /* save upper gap opened */
1009 if (h > score) { /* ? new best score */
1010 score = h; /* save best */
1012 J = (int)(ssj-ss); /* column */
1016 } /* done with forward pass */
1017 if (score <= 0) return 0;
1019 /* to get the start point, go backwards */
1021 /* 18-June-2003 fix bug in backtracking code to identify start of
1022 alignment. Code used pam2[0][aa0[j]][aa1[i]] instead of
1023 pam2p[0][j][aa1[i]]. Ideally, it would use waa_a.
1027 for (ssj=ss+J; ssj>=ss; ssj--) ssj->H= ssj->E= -1;
1029 for (i=I; i>=0; i--) {
1031 p = (i == I) ? 0 : -1;
1032 for (ssj=ss+J, j= J; ssj>=ss; ssj--,j--) {
1034 ssj->E=max(ssj->E,ssj->H-q)-r;
1035 h = max(max(ssj->E,f),p+f_str->pam2p[0][j][aa1[i]]);
1042 if (cost >= score) goto found;
1049 /* printf(" %d: L: %3d-%3d/%3d; K: %3d-%3d/%3d\n",score,L,J,n0,K,I,n1); */
1051 /* in the f_str version, the *res array is already allocated at 4*n0/3 */
1053 a_res->res = f_str->res;
1055 a_res->max0 = J+1; a_res->min0 = L; a_res->max1 = I+1; a_res->min1 = K;
1057 /* ALIGN(&aa1[K-1],&aa0[L-1],I-K+1,J-L+1,ppst->pam2[0],q,r,res,nres,f_str); */
1060 /* this code no longer refers to aa0[], it uses pam2p[0][L] instead */
1061 ALIGN(&aa0[L-1],&aa1[K-1],J-L+1,I-K+1,f_str->pam2p[0],L,q,r,
1062 a_res->res,&a_res->nres,f_str);
1064 /* DISPLAY(&aa0[L-1],&aa1[K-1],J-L+1,I-K+1,res,L,K,ppst->sq); */
1066 /* return *res and nres */
1071 static int CHECK_SCORE(const unsigned char *A, const unsigned char *B,
1073 int *S, int **W, int IW, int G, int H, int *nres);
1075 #define gap(k) ((k) <= 0 ? 0 : g+h*(k)) /* k-symbol indel cost */
1077 /* Append "Delete k" op */
1080 *last = (*sapp)[-1] -= (k); \
1082 *last = (*sapp)[0] = -(k); \
1087 /* Append "Insert k" op */
1090 *last = (*sapp)[-1] += (k); \
1092 *last = (*sapp)[0] = (k); \
1102 print_seq_prof(unsigned char *A, int M,
1103 unsigned char *B, int N,
1104 int **w, int iw, int dir) {
1106 int i_max, j_max, i,j;
1110 for (i=1; i<=min(60,M); i++) {
1111 fprintf(stderr,"%c",aa[A[i]]);
1113 fprintf(stderr, - %d\n,M);
1115 for (i=0; i<min(60,M); i++) {
1117 for (j=1; j<21; j++) {
1118 if (w[iw+i][j]> i_max) {
1123 fprintf(stderr,"%c",aa[j_max]);
1127 for (i=1; i<=min(60,N); i++) {
1128 fprintf(stderr,"%c",aa[B[i]]);
1131 fprintf(stderr," -%c: %d,%d\n",c_dir[dir],M,N);
1135 /* align(A,B,M,N,tb,te,last) returns the cost of an optimum conversion between
1136 A[1..M] and B[1..N] that begins(ends) with a delete if tb(te) is zero
1137 and appends such a conversion to the current script. */
1140 align(const unsigned char *A, const unsigned char *B,
1142 int tb, int te, int **w, int iw, int g, int h,
1143 struct f_struct *f_str, int dir,
1144 int **sapp, int *last)
1147 int midi, midj, type; /* Midpoint, type, and cost */
1152 register int c, e, d, s;
1154 struct swstr *f_ss, *r_ss;
1156 /* print_seq_prof(A,M,B,N,w,iw,dir); */
1163 /* Boundary cases: M <= 1 or N == 0 */
1176 if (tb < te) tb = te;
1177 midc = (tb-h) - gap(N);
1181 for (j = 1; j <= N; j++) {
1182 c = -gap(j-1) + wa[B[j]] - gap(N-j);
1183 if (c > midc) { midc = c; midj = j;}
1185 if (midj == 0) { DEL(1) INS(N) }
1187 if (midj > 1) { INS(midj-1)}
1188 *last = (*sapp)[0] = 0;
1190 if (midj < N) { INS(N-midj)}
1195 /* Divide: Find optimum midpoint (midi,midj) of cost midc */
1197 midi = M/2; /* Forward phase: */
1198 f_ss[0].H = 0; /* Compute H(M/2,k) & E(M/2,k) for all k */
1200 for (j = 1; j <= N; j++) {
1201 f_ss[j].H = t = t-h;
1205 for (i = 1; i <= midi; i++) {
1207 f_ss[0].H = c = t = t-h;
1211 for (j = 1; j <= N; j++) {
1212 if ((c = c - m) > (e = e - h)) e = c;
1213 if ((c = f_ss[j].H - m) > (d = f_ss[j].E - h)) d = c;
1222 f_ss[0].E = f_ss[0].H;
1224 r_ss[N].H = 0; /* Reverse phase: */
1225 t = -g; /* Compute R(M/2,k) & S(M/2,k) for all k */
1227 for (j = N-1; j >= 0; j--) {
1228 r_ss[j].H = t = t-h;
1233 for (i = M-1; i >= midi; i--) {
1235 r_ss[N].H = c = t = t-h;
1237 /* wa = w[A[i+1]]; */
1239 for (j = N-1; j >= 0; j--) {
1240 if ((c = c - m) > (e = e - h)) { e = c; }
1241 if ((c = r_ss[j].H - m) > (d = r_ss[j].E - h)) { d = c; }
1250 r_ss[N].E = r_ss[N].H;
1252 midc = f_ss[0].H+r_ss[0].H; /* Find optimal midpoint */
1256 for (j = 0; j <= N; j++) {
1257 if ((c = f_ss[j].H + r_ss[j].H) >= midc) {
1258 if (c > midc || (f_ss[j].H != f_ss[j].E && r_ss[j].H == r_ss[j].E)) {
1265 for (j = N; j >= 0; j--) {
1266 if ((c = f_ss[j].E + r_ss[j].E + g) > midc) {
1273 /* Conquer: recursively around midpoint */
1276 { c1 = align(A,B,midi,midj,tb,-g,w,iw,g,h,f_str,0,sapp,last);
1277 c2 = align(A+midi,B+midj,M-midi,N-midj,-g,te,w,iw+midi,g,h,f_str,1,sapp,last);
1280 { align(A,B,midi-1,midj,tb,0,w,iw,g,h,f_str,2,sapp,last);
1282 align(A+midi+1,B+midj,M-midi-1,N-midj,0,te,w,iw+midi+1,g,h,f_str,3,sapp,last);
1287 /* Interface and top level of comparator */
1290 ALIGN(const unsigned char *A, const unsigned char *B,
1292 int **W, int IW, int G, int H, int *S, int *NC,
1293 struct f_struct *f_str)
1295 struct swstr *f_ss, *r_ss;
1302 if ((f_ss = (struct swstr *) calloc (N+2, sizeof (struct swstr)))
1304 fprintf (stderr, "cannot allocate f_ss array %3d\n", N+2);
1310 if ((r_ss = (struct swstr *) calloc (N+2, sizeof (struct swstr)))
1312 fprintf (stderr, "cannot allocate r_ss array %3d\n", N+2);
1318 /* print_seq_prof(A,M,W,IW); */
1319 c = align(A,B,M,N,-G,-G,W,IW,G,H,f_str,0,&sapp,&last); /* OK, do it */
1321 ck = CHECK_SCORE(A,B,M,N,S,W,IW,G,H,NC);
1323 fprintf(stdout,"*** Check_score error. %d != %d ***\n",c,ck);
1324 fprintf(stderr,"*** Check_score error. %d != %d ***\n",c,ck);
1328 free(r_ss); free(f_ss);
1333 /* Alignment display routine */
1336 DISPLAY(const unsigned char *A, const unsigned char *B,
1338 int *S, int AP, int BP, char *sq)
1339 { register char *a, *b, *c;
1340 register int i, j, op;
1343 char ALINE[51], BLINE[51], CLINE[51];
1345 i = j = op = lines = 0;
1351 while (i < M || j < N)
1352 { if (op == 0 && *S == 0)
1356 *c++ = (*a++ == *b++) ? '|' : ' ';
1367 { *a++ = sq[A[++i]];
1373 if (a >= ALINE+50 || (i >= M && j >= N))
1374 { *a = *b = *c = '\0';
1375 printf("\n%5d ",50*lines++);
1376 for (b = ALINE+10; b <= a; b += 10)
1380 printf("\n%5d %s\n %s\n%5d %s\n",ap,ALINE,CLINE,bp,BLINE);
1390 /* CHECK_SCORE - return the score of the alignment stored in S */
1392 static int CHECK_SCORE(const unsigned char *A, const unsigned char *B,
1394 int *S, int **w, int iw,
1395 int g, int h, int *NC)
1397 register int i, j, op, nc;
1400 /* print_seq_prof(A,M,w,iw); */
1402 score = i = j = op = nc = 0;
1403 while (i < M || j < N) {
1406 score = w[iw+i][B[++j]] + score;
1411 score = score - (g+op*h);
1415 score = score - (g-op*h);
1425 pre_cons(const unsigned char *aa1, int n1, int frame, struct f_struct *f_str) {
1428 f_str->n10 = aatran(aa1,f_str->aa1x,n1,frame);
1433 /* aln_func_vals - set up aln.qlfact, qlrev, llfact, llmult, frame, llrev */
1434 /* call from calcons, calc_id, calc_code */
1436 aln_func_vals(int frame, struct a_struct *aln) {
1438 aln->llfact = aln->llmult = aln->qlfact = 1;
1439 aln->qlrev = aln->llrev = 0;
1443 /* 29-June-2003 this version has been modified to use pst.pam2p
1444 instead of pam2 to indicate similarity */
1448 int calcons(const unsigned char *aa0, int n0,
1449 const unsigned char *aa1, int n1,
1450 int *nc, struct a_struct *aln,
1451 struct a_res_str a_res,
1453 char *seqc0, char *seqc1, char *seqca,
1454 struct f_struct *f_str)
1457 int op, lenc, nd, ns, itmp;
1458 char *sp0, *sp1, *spa, *sq;
1462 if (pst.ext_sq_set) { sq = pst.sqx; }
1463 else { sq = pst.sq; }
1465 aln->amin0 = a_res.min0;
1466 aln->amax0 = a_res.max0;
1467 aln->amin1 = a_res.min1;
1468 aln->amax1 = a_res.max1;
1470 /* first fill in the ends */
1472 if (min(a_res.min0,a_res.min1)<aln->llen || aln->showall==1) /* will we show all the start ?*/
1473 if (a_res.min0>=a_res.min1) { /* aa0 extends more to left */
1475 if (aln->showall==1) mins=a_res.min0;
1476 else mins = min(a_res.min0,aln->llcntx);
1477 aancpy(seqc0,(char *)aa0+a_res.min0-mins,mins,pst);
1478 aln->smin0 = a_res.min0-mins;
1479 if ((mins-a_res.min1)>0) {
1480 memset(seqc1,' ',mins-a_res.min1);
1481 aancpy(seqc1+mins-a_res.min1,(char *)aa1,a_res.min1,pst);
1485 aancpy(seqc1,(char *)aa1+a_res.min1-mins,mins,pst);
1486 aln->smin1 = a_res.min1-mins;
1491 if (aln->showall == 1) mins=a_res.min1;
1492 else mins = min(a_res.min1,aln->llcntx);
1493 aancpy(seqc1,(char *)(aa1+a_res.min1-mins),mins,pst);
1494 aln->smin1 = a_res.min1-mins;
1495 if ((mins-a_res.min0)>0) {
1496 memset(seqc0,' ',mins-a_res.min0);
1497 aancpy(seqc0+mins-a_res.min0,(char *)aa0,a_res.min0,pst);
1501 aancpy(seqc0,(char *)aa0+a_res.min0-mins,mins,pst);
1502 aln->smin0 = a_res.min0-mins;
1505 else { /* we are not showing the start */
1506 /* mins has the amount of unaligned context to be shown */
1507 mins= min(aln->llcntx,min(a_res.min0,a_res.min1));
1510 aln->smin0=a_res.min0 - mins;
1511 aln->smin1=a_res.min1 - mins;
1513 aancpy(seqc0,(char *)aa0+a_res.min0-mins,mins,pst);
1514 aancpy(seqc1,(char *)aa1+a_res.min1-mins,mins,pst);
1517 /* now get the middle */
1519 memset(seqca,M_BLANK,mins);
1525 lenc = aln->nident = aln->nsim = aln->ngap_q = aln->ngap_l = aln->nfs =op = 0;
1529 while (i0 < a_res.max0 || i1 < a_res.max1) {
1530 if (op == 0 && *rp == 0) {
1533 if ((itmp=f_str->pam2p[0][i0][aa1[i1]])<0) { *spa = M_NEG; }
1534 else if (itmp == 0) { *spa = M_ZERO;}
1535 else {*spa = M_POS;}
1536 if (*spa == M_POS || *spa==M_ZERO) aln->nsim++;
1538 *sp0 = sq[aa0[i0++]];
1539 *sp1 = sq[aa1[i1++]];
1541 if (toupper(*sp0) == toupper(*sp1)) {aln->nident++; *spa = M_IDENT;}
1542 else if (pst.nt_align && ((*sp0 == 'T' && *sp1 == 'U') ||
1543 (*sp0=='U' && *sp1=='T'))) {
1544 aln->nident++; *spa=M_IDENT;
1547 sp0++; sp1++; spa++;
1550 if (op==0) op = *rp++;
1553 *sp1++ = sq[aa1[i1++]];
1560 *sp0++ = sq[aa0[i0++]];
1572 /* now we have the middle, get the right end */
1575 /* how much extra to show at end ? */
1576 if (!aln->llcntx_flg) {
1577 ns = mins + lenc + aln->llen; /* show an extra line? */
1578 ns -= (itmp = ns %aln->llen); /* itmp = left over on last line */
1579 if (itmp>aln->llen/2) ns += aln->llen; /* more than 1/2 , use another*/
1580 nd = ns - (mins+lenc); /* this much extra */
1582 else nd = aln->llcntx;
1584 if (nd > max(n0-a_res.max0,n1-a_res.max1))
1585 nd = max(n0-a_res.max0,n1-a_res.max1);
1587 if (aln->showall==1) {
1588 nd = max(n0-a_res.max0,n1-a_res.max1); /* reset for showall=1 */
1590 aancpy(seqc0+mins+lenc,(char *)aa0+a_res.max0,n0-a_res.max0,pst);
1591 aancpy(seqc1+mins+lenc,(char *)aa1+a_res.max1,n1-a_res.max1,pst);
1592 /* fill with blanks - this is required to use one 'nc' */
1593 memset(seqc0+mins+lenc+n0-a_res.max0,' ',nd-(n0-a_res.max0));
1594 memset(seqc1+mins+lenc+n1-a_res.max1,' ',nd-(n1-a_res.max1));
1597 if ((nd-(n0-a_res.max0))>0) {
1598 aancpy(seqc0+mins+lenc,(char *)aa0+a_res.max0,n0-a_res.max0,pst);
1599 memset(seqc0+mins+lenc+n0-a_res.max0,' ',nd-(n0-a_res.max0));
1601 else aancpy(seqc0+mins+lenc,(char *)aa0+a_res.max0,nd,pst);
1603 if ((nd-(n1-a_res.max1))>0) {
1604 aancpy(seqc1+mins+lenc,(char *)aa1+a_res.max1,n1-a_res.max1,pst);
1605 memset(seqc1+mins+lenc+n1-a_res.max1,' ',nd-(n1-a_res.max1));
1607 else aancpy(seqc1+mins+lenc,(char *)aa1+a_res.max1,nd,pst);
1614 return mins+lenc+nd;
1617 int calcons_a(const unsigned char *aa0, unsigned char *aa0a, int n0,
1618 const unsigned char *aa1, int n1,
1620 struct a_struct *aln,
1621 struct a_res_str a_res,
1623 char *seqc0, char *seqc0a, char *seqc1, char *seqca,
1624 char *ann_arr, struct f_struct *f_str)
1627 int op, lenc, nd, ns, itmp;
1628 char *sp0, *sp0a, *sp1, *spa, *sq;
1632 if (pst.ext_sq_set) {
1639 aln->amin0 = a_res.min0;
1640 aln->amax0 = a_res.max0;
1641 aln->amin1 = a_res.min1;
1642 aln->amax1 = a_res.max1;
1644 /* first fill in the ends */
1646 if (min(a_res.min0,a_res.min1)<aln->llen || aln->showall==1) /* will we show all the start ?*/
1647 if (a_res.min0>=a_res.min1) { /* aa0 extends more to left */
1649 if (aln->showall==1) mins=a_res.min0;
1650 else mins = min(a_res.min0,aln->llcntx);
1651 aancpy(seqc0,(char *)aa0+a_res.min0-mins,mins,pst);
1652 aln->smin0 = a_res.min0-mins;
1653 if ((mins-a_res.min1)>0) {
1654 memset(seqc1,' ',mins-a_res.min1);
1655 aancpy(seqc1+mins-a_res.min1,(char *)aa1,a_res.min1,pst);
1659 aancpy(seqc1,(char *)aa1+a_res.min1-mins,mins,pst);
1660 aln->smin1 = a_res.min1-mins;
1665 if (aln->showall == 1) mins=a_res.min1;
1666 else mins = min(a_res.min1,aln->llcntx);
1667 aancpy(seqc1,(char *)(aa1+a_res.min1-mins),mins,pst);
1668 aln->smin1 = a_res.min1-mins;
1669 if ((mins-a_res.min0)>0) {
1670 memset(seqc0,' ',mins-a_res.min0);
1671 aancpy(seqc0+mins-a_res.min0,(char *)aa0,a_res.min0,pst);
1675 aancpy(seqc0,(char *)aa0+a_res.min0-mins,mins,pst);
1676 aln->smin0 = a_res.min0-mins;
1680 mins= min(aln->llcntx,min(a_res.min0,a_res.min1));
1682 aln->smin0=a_res.min0 - smins;
1683 aln->smin1=a_res.min1 - smins;
1684 aancpy(seqc0,(char *)aa0+a_res.min0-mins,mins,pst);
1685 aancpy(seqc1,(char *)aa1+a_res.min1-mins,mins,pst);
1688 /* now get the middle */
1690 memset(seqca,M_BLANK,mins);
1691 memset(seqc0a,' ',mins);
1698 lenc = aln->nident = aln->nsim = aln->ngap_q = aln->ngap_l = aln->nfs =op = 0;
1702 while (i0 < a_res.max0 || i1 < a_res.max1) {
1703 if (op == 0 && *rp == 0) {
1706 if ((itmp=f_str->pam2p[0][i0][aa1[i1]])<0) { *spa = M_NEG; }
1707 else if (itmp == 0) { *spa = M_ZERO;}
1708 else {*spa = M_POS;}
1709 if (*spa == M_POS || *spa==M_ZERO) aln->nsim++;
1711 *sp0a++ = ann_arr[aa0a[i0]];
1712 *sp0 = sq[aa0[i0++]];
1713 *sp1 = sq[aa1[i1++]];
1715 if (toupper(*sp0) == toupper(*sp1)) {aln->nident++; *spa = M_IDENT;}
1716 else if (pst.nt_align && ((*sp0 == 'T' && *sp1 == 'U') ||
1717 (*sp0=='U' && *sp1=='T'))) {
1718 aln->nident++; *spa=M_IDENT;
1721 sp0++; sp1++; spa++;
1724 if (op==0) op = *rp++;
1727 *sp1++ = sq[aa1[i1++]];
1735 *sp0a++ = ann_arr[aa0a[i0]];
1736 *sp0++ = sq[aa0[i0++]];
1747 *sp0a = *spa = '\0';
1748 /* now we have the middle, get the right end */
1750 /* how much extra to show at end ? */
1751 if (!aln->llcntx_flg) {
1752 ns = mins + lenc + aln->llen; /* show an extra line? */
1753 ns -= (itmp = ns %aln->llen); /* itmp = left over on last line */
1754 if (itmp>aln->llen/2) ns += aln->llen; /* more than 1/2 , use another*/
1755 nd = ns - (mins+lenc); /* this much extra */
1757 else nd = aln->llcntx;
1759 if (nd > max(n0-a_res.max0,n1-a_res.max1))
1760 nd = max(n0-a_res.max0,n1-a_res.max1);
1762 if (aln->showall==1) {
1763 nd = max(n0-a_res.max0,n1-a_res.max1); /* reset for showall=1 */
1765 aancpy(seqc0+mins+lenc,(char *)aa0+a_res.max0,n0-a_res.max0,pst);
1766 aancpy(seqc1+mins+lenc,(char *)aa1+a_res.max1,n1-a_res.max1,pst);
1767 /* fill with blanks - this is required to use one 'nc' */
1768 memset(seqc0+mins+lenc+n0-a_res.max0,' ',nd-(n0-a_res.max0));
1769 memset(seqc1+mins+lenc+n1-a_res.max1,' ',nd-(n1-a_res.max1));
1772 if ((nd-(n0-a_res.max0))>0) {
1773 aancpy(seqc0+mins+lenc,(char *)aa0+a_res.max0,n0-a_res.max0,pst);
1774 memset(seqc0+mins+lenc+n0-a_res.max0,' ',nd-(n0-a_res.max0));
1776 else aancpy(seqc0+mins+lenc,(char *)aa0+a_res.max0,nd,pst);
1778 if ((nd-(n1-a_res.max1))>0) {
1779 aancpy(seqc1+mins+lenc,(char *)aa1+a_res.max1,n1-a_res.max1,pst);
1780 memset(seqc1+mins+lenc+n1-a_res.max1,' ',nd-(n1-a_res.max1));
1782 else aancpy(seqc1+mins+lenc,(char *)aa1+a_res.max1,nd,pst);
1785 return mins+lenc+nd;
1789 update_code(char *al_str, int al_str_max, int op, int op_cnt);
1791 /* build an array of match/ins/del - length strings */
1792 int calc_code(const unsigned char *aa0, int n0,
1793 const unsigned char *aa1, int n1,
1794 struct a_struct *aln,
1795 struct a_res_str a_res,
1797 char *al_str, int al_str_n, struct f_struct *f_str)
1802 const unsigned char *aa1p;
1807 if (pst.ext_sq_set) {
1822 aln->amin0 = a_res.min0;
1823 aln->amax0 = a_res.max0;
1824 aln->amin1 = a_res.min1;
1825 aln->amax1 = a_res.max1;
1828 lenc = aln->nident = aln->nsim = aln->ngap_q = aln->ngap_l = aln->nfs = op = p_op = 0;
1835 while (i0 < a_res.max0 || i1 < a_res.max1) {
1836 if (op == 0 && *rp == 0) {
1838 if (pst.pam2[0][aa0[i0]][aa1p[i1]]>=0) { aln->nsim++;}
1840 sp0 = sq[aa0[i0++]];
1841 sp1 = sq[aa1p[i1++]];
1843 if (p_op == 0 || p_op==3) {
1844 if (sp0 != '*' && sp1 != '*') {
1846 update_code(al_str,al_str_n-strlen(al_str),p_op,op_cnt);
1847 op_cnt = 1; p_op = 0;
1852 update_code(al_str,al_str_n-strlen(al_str),p_op,op_cnt);
1853 op_cnt = 1; p_op = 3;
1857 update_code(al_str,al_str_n-strlen(al_str),p_op,op_cnt);
1858 op_cnt = 1; p_op = 0;
1864 if (toupper(sp0) == toupper(sp1)) aln->nident++;
1865 else if (pst.nt_align) {
1866 if ((toupper(sp0) == 'T' && toupper(sp1) == 'U') ||
1867 (toupper(sp0)=='U' && toupper(sp1)=='T')) aln->nident++;
1868 else if (toupper(sp0) == 'N') aln->ngap_q++;
1869 else if (toupper(sp1) == 'N') aln->ngap_l++;
1873 if (op==0) op = *rp++;
1875 if (p_op == 1) { op_cnt++;}
1877 update_code(al_str,al_str_n - strlen(al_str),p_op,op_cnt);
1878 op_cnt = 1; p_op = 1;
1880 op--; lenc++; i1++; aln->ngap_q++;
1883 if (p_op == 2) { op_cnt++;}
1885 update_code(al_str,al_str_n - strlen(al_str),p_op,op_cnt);
1886 op_cnt = 1; p_op = 2;
1888 op++; lenc++; i0++; aln->ngap_l++;
1892 update_code(al_str,al_str_n - strlen(al_str),p_op,op_cnt);
1898 update_code(char *al_str, int al_str_max, int op, int op_cnt) {
1900 char op_char[5]={"=-+*"};
1903 sprintf(tmp_cnt,"%c%d",op_char[op],op_cnt);
1904 strncat(al_str,tmp_cnt,al_str_max);
1907 int calc_id(const unsigned char *aa0, int n0,
1908 const unsigned char *aa1, int n1,
1909 struct a_struct *aln,
1910 struct a_res_str a_res,
1912 struct f_struct *f_str)
1914 int i0, i1, nn1, n_id;
1917 const unsigned char *aa1p;
1921 if (pst.ext_sq_set) {
1936 aln->amin0 = a_res.min0;
1937 aln->amax0 = a_res.max0;
1938 aln->amin1 = a_res.min1;
1939 aln->amax1 = a_res.max1;
1942 lenc = n_id = aln->nsim = aln->ngap_q = aln->ngap_l = aln->nfs = op = 0;
1946 while (i0 < a_res.max0 || i1 < a_res.max1) {
1947 if (op == 0 && *rp == 0) {
1950 if (pst.pam2[0][aa0[i0]][aa1p[i1]]>=0) { aln->nsim++;}
1952 sp0 = sq[aa0[i0++]];
1953 sp1 = sq[aa1p[i1++]];
1954 if (toupper(sp0) == toupper(sp1)) n_id++;
1955 else if (pst.nt_align &&
1956 ((sp0=='T' && sp1== 'U')||(sp0=='U' && sp1=='T'))) n_id++;
1959 if (op==0) op = *rp++;
1960 if (op>0) {op--; lenc++; i1++; aln->ngap_q++; }
1961 else {op++; lenc++; i0++; aln->ngap_l++; }
1971 update_params(struct qmng_str *qm_msg, struct pstruct *ppst)
1973 ppst->n0 = qm_msg->n0;