6 #include "io_lib_header.h"
7 #include "util_lib_header.h"
8 #include "define_header.h"
9 #include "dp_lib_header.h"
11 static void make_p_ptrs(int *tptr, int *pl, int naseq, int l);
12 static void make_n_ptrs(int *tptr, int *pl, int naseq, int len);
13 static void put_frag(int fs, int v1, int v2, int flen);
14 static int frag_rel_pos(int a1, int b1, int a2, int b2);
15 static void des_quick_sort(int *array1, int *array2, int array_size);
16 static void pair_align(int seq_no, int l1, int l2);
26 /*extern int *seqlen_array;
27 extern char **seq_array;
28 extern int dna_ktup, dna_window, dna_wind_gap, dna_signif; params for DNA
29 extern int prot_ktup,prot_window,prot_wind_gap,prot_signif; params for prots
31 extern Boolean dnaflag;
34 extern int max_aln_length;
37 static int *seqlen_array;
38 static char **seq_array;
42 static int max_aln_length;
46 static int curr_frag,maxsf;
48 static int *diag_index;
51 int ktup,window,wind_gap,signif; /* Pairwise aln. params */
53 int *zza, *zzb, *zzc, *zzd;
55 static Boolean percent=1;
58 static void make_p_ptrs(int *tptr,int *pl,int naseq,int l)
61 int i,j,limit,code,flag;
64 /*tptr--> pointer to the last occurence of the same residue or ktuple:
68 tptr: 0 0 0 0 0 1 2 5 0
75 a[i] = (int) pow((double)(max_aa+1),(double)(i-1));
77 limit = (int) pow((double)(max_aa+1),(double)ktup);
85 for(i=1;i<=(l-ktup+1);++i) {
88 for(j=1;j<=ktup;++j) {
89 residue = seq_array[naseq][i+j-1];
90 if((residue<0) || (residue > max_aa)){
94 code += ((residue) * a[j]);
99 if(pl[code]!=0)tptr[i]=pl[code];
105 static void make_n_ptrs(int *tptr,int *pl,int naseq,int len)
107 static int pot[]={ 0, 1, 4, 16, 64, 256, 1024, 4096 };
108 int i,j,limit,code,flag;
111 limit = (int) pow((double)4,(double)ktup);
113 for(i=1;i<=limit;++i)
118 for(i=1;i<=len-ktup+1;++i) {
121 for(j=1;j<=ktup;++j) {
122 residue = seq_array[naseq][i+j-1];
123 if((residue<0) || (residue>4)){
127 code += ((residue) * pot[j]); /* DES */
139 static void put_frag(int fs,int v1,int v2,int flen)
142 accum[0][curr_frag]=fs;
143 accum[1][curr_frag]=v1;
144 accum[2][curr_frag]=v2;
145 accum[3][curr_frag]=flen;
149 accum[4][curr_frag]=0;
153 if(fs >= accum[0][maxsf]) {
154 accum[4][curr_frag]=maxsf;
163 if(fs>=accum[0][next])
166 accum[4][curr_frag]=next;
167 accum[4][end]=curr_frag;
172 static int frag_rel_pos(int a1,int b1,int a2,int b2)
182 if(a2+ktup-1<a1 && b2+ktup-1<b1)
189 static void des_quick_sort(int *array1, int *array2, int array_size)
191 /* Quicksort routine, adapted from chapter 4, page 115 of software tools */
192 /* by Kernighan and Plauger, (1986) */
193 /* Sort the elements of array1 and sort the */
194 /* elements of array2 accordingly */
200 int lst[50], ust[50]; /* the maximum no. of elements must be*/
201 /* < log(base2) of 50 */
215 for(i=i+1; array1[i] < pivlin; i++)
217 for(j=j-1; j > i; j--)
218 if(array1[j] <= pivlin) break;
221 array1[i] = array1[j];
225 array2[i] = array2[j];
233 array1[i] = array1[j];
237 array2[i] = array2[j];
240 if(i-lst[p] < ust[p] - i) {
261 static void pair_align(int seq_no,int l1,int l2)
263 int pot[8],i,j,l,m,flag,limit,pos,tl1,vn1,vn2,flen,osptr,fs;
264 int tv1,tv2,encrypt,subt1,subt2,rmndr;
269 pot[i] = (int) pow((double)4,(double)(i-1));
270 limit = (int) pow((double)4,(double)ktup);
273 for (i=1;i<=ktup;i++)
274 pot[i] = (int) pow((double)(max_aa+1),(double)(i-1));
275 limit = (int) pow((double)(max_aa+1),(double)ktup);
280 for(i=1;i<=tl1;++i) {
281 slopes[i]=displ[i]=0;
286 /* increment diagonal score for each k_tuple match */
287 /* Attempt at guessing the best band by looking at identities*/
289 for(i=1;i<=limit;++i)
299 ++displ[osptr]; /*PLUG THE Pos Dependant Scheme Here!!!! (For Id only)*/
306 /* choose the top SIGNIF diagonals */
308 des_quick_sort(displ, diag_index, tl1);
310 j = tl1 - signif + 1;
313 /* flag all diagonals within WINDOW of a top diagonal */
315 for(i=tl1; i>=j; i--)
318 l = (1 >pos-window) ? 1 : pos-window;
319 m = (tl1<pos+window) ? tl1 : pos+window;
324 for(i=1; i<=tl1; i++) displ[i] = 0; /*reset the diagonals score*/
327 next=curr_frag=maxsf=0;
328 for(i=1;i<=(l1-ktup+1);++i)
332 {residue = seq_array[seq_no][i+j-1];if((residue<0) || (residue>max_aa)){flag=TRUE; break;}encrypt += ((residue)*pot[j]);}
339 /*now trying to match i-ktup and vn2-ktup*/
347 osptr=i-vn2+l2; /*osptr=Diagonal under investigation*/
348 if(slopes[osptr]!=1) /*Get the next diagonal if that one is not flagged*/
363 if(curr_frag>=2*max_aln_length)
368 displ[osptr]=curr_frag;
369 put_frag(fs,i,vn2,flen); /*sets the coordinates of the fragments*/
375 if(frag_rel_pos(i,vn2,tv1,tv2))
377 if(i-vn2==accum[1][next]-accum[2][next])
379 if(i>accum[1][next]+(ktup-1))
380 fs=accum[0][next]+ktup;
383 rmndr=i-accum[1][next];
384 fs=accum[0][next]+rmndr;
396 if(i>accum[1][displ[osptr]]+(ktup-1))
397 subt1=accum[0][displ[osptr]]+ktup;
400 rmndr=i-accum[1][displ[osptr]];
401 subt1=accum[0][displ[osptr]]+rmndr;
404 subt2=accum[0][next]-wind_gap+ktup;
438 int ** show_pair(int istart, int iend, int jstart, int jend, int *in_seqlen_array, char **in_seq_array, int dna_ktup, int dna_window, int dna_wind_gap, int dna_signif,int prot_ktup, int prot_window,int prot_wind_gap,int prot_signif, int in_nseqs,int in_dnaflag, int in_max_aa, int in_max_aln_length )
444 seqlen_array=vcalloc ( in_nseqs+1, sizeof(int));
445 for ( i=0; i< in_nseqs; i++)seqlen_array[i+1]=in_seqlen_array[i];
448 seq_array=declare_char ( in_nseqs+1, in_max_aln_length);
449 for ( i=0; i< in_nseqs; i++)sprintf (seq_array[i+1], "%s",in_seq_array[i]);
455 max_aln_length=in_max_aln_length;
458 tmat=declare_int ( nseqs+1, nseqs+1);
459 accum=declare_int( 5, 2*max_aln_length+1);
461 displ = (int *) vcalloc( (2*max_aln_length +1), sizeof (int) );
462 slopes = (char *)vcalloc( (2*max_aln_length +1) , sizeof (char));
463 diag_index = (int *) vcalloc( (2*max_aln_length +1) , sizeof (int) );
465 zza = (int *)vcalloc( (max_aln_length+1),sizeof (int) );
466 zzb = (int *)vcalloc( (max_aln_length+1),sizeof (int) );
468 zzc = (int *)vcalloc( (max_aln_length+1), sizeof (int) );
469 zzd = (int *)vcalloc( (max_aln_length+1), sizeof (int) );
475 wind_gap = dna_wind_gap;
479 window = prot_window;
480 signif = prot_signif;
481 wind_gap = prot_wind_gap;
484 for(i=istart+1;i<=iend;++i)
487 make_n_ptrs(zza,zzc,i,seqlen_array[i]);
489 make_p_ptrs(zza,zzc,i,seqlen_array[i]);
490 for(j=MAX(jstart+1, i+1);j<=jend;++j)
495 make_n_ptrs(zzb,zzd,j,seqlen_array[j]);
497 make_p_ptrs(zzb,zzd,j,seqlen_array[j]);
499 pair_align(i,seqlen_array[i],seqlen_array[j]);
504 calc_score=(double)accum[0][maxsf];
506 dsr=(seqlen_array[i]<seqlen_array[j]) ?
507 seqlen_array[i] : seqlen_array[j];
508 calc_score = (calc_score/(double)dsr) * 100.0;
512 tmat[i-1][j-1] = (int)(((100.0 - calc_score)/100.0)*1000);
513 tmat[j-1][i-1] = (int)(((100.0 - calc_score)/100.0)*1000);
514 fprintf ( stderr, "\r[%d %d]=> %.2f",i, j, (float)calc_score );
519 free_int ( accum, -1);
532 /*********************************COPYRIGHT NOTICE**********************************/
533 /*© Centro de Regulacio Genomica */
535 /*Cedric Notredame */
536 /*Tue Oct 27 10:12:26 WEST 2009. */
537 /*All rights reserved.*/
538 /*This file is part of T-COFFEE.*/
540 /* T-COFFEE is free software; you can redistribute it and/or modify*/
541 /* it under the terms of the GNU General Public License as published by*/
542 /* the Free Software Foundation; either version 2 of the License, or*/
543 /* (at your option) any later version.*/
545 /* T-COFFEE is distributed in the hope that it will be useful,*/
546 /* but WITHOUT ANY WARRANTY; without even the implied warranty of*/
547 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/
548 /* GNU General Public License for more details.*/
550 /* You should have received a copy of the GNU General Public License*/
551 /* along with Foobar; if not, write to the Free Software*/
552 /* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/
553 /*............................................... |*/
554 /* If you need some more information*/
555 /* cedric.notredame@europe.com*/
556 /*............................................... |*/
560 /*********************************COPYRIGHT NOTICE**********************************/