Next version of JABA
[jabaws.git] / binaries / src / tcoffee / t_coffee_source / showpair.c
1 #include <stdio.h>
2 #include <string.h>
3 #include <stdlib.h>
4 #include <math.h>
5
6 #include "io_lib_header.h"
7 #include "util_lib_header.h"
8 #include "define_header.h"
9 #include "dp_lib_header.h"
10
11 static void make_p_ptrs(int *tptr, int *pl, int naseq, int l);
12 static void make_n_ptrs(int *tptr, int *pl, int naseq, int len);
13 static void put_frag(int fs, int v1, int v2, int flen);
14 static int frag_rel_pos(int a1, int b1, int a2, int b2);
15 static void des_quick_sort(int *array1, int *array2, int array_size);
16 static void pair_align(int seq_no, int l1, int l2);
17
18
19 /*
20 *       Prototypes
21 */
22
23 /*
24 *        Global variables
25 */
26 /*extern int *seqlen_array;
27   extern char **seq_array;
28   extern int  dna_ktup, dna_window, dna_wind_gap, dna_signif; params for DNA 
29  extern int prot_ktup,prot_window,prot_wind_gap,prot_signif; params for prots 
30   extern int    nseqs;
31   extern Boolean        dnaflag;
32   extern double         **tmat;
33   extern int    max_aa;
34   extern int  max_aln_length;
35 */
36
37 static int *seqlen_array;
38 static char **seq_array;
39  
40 static int      nseqs;
41 static int      dnaflag;
42 static int      max_aln_length;
43 static int      max_aa;
44
45 static int      next;
46 static int      curr_frag,maxsf;
47 static int      **accum;
48 static int      *diag_index;
49 static char     *slopes;
50
51 int ktup,window,wind_gap,signif;                      /* Pairwise aln. params */
52 int *displ;
53 int *zza, *zzb, *zzc, *zzd;
54
55 static Boolean percent=1;
56
57
58 static void make_p_ptrs(int *tptr,int *pl,int naseq,int l)
59 {
60         static int a[10];
61         int i,j,limit,code,flag;
62         int residue;
63         
64         /*tptr--> pointer to the last occurence of the same residue or ktuple:
65
66           abcdeabef
67           
68           tptr: 0 0 0 0 0 1 2 5 0
69           pl[a]=6
70           pl[b]=7
71         */
72
73         
74         for (i=1;i<=ktup;i++)
75            a[i] = (int) pow((double)(max_aa+1),(double)(i-1));
76
77         limit = (int)   pow((double)(max_aa+1),(double)ktup);
78         for(i=1;i<=limit;++i)
79                 pl[i]=0;
80         for(i=1;i<=l;++i)
81                 tptr[i]=0;
82         
83         
84
85         for(i=1;i<=(l-ktup+1);++i) {
86                 code=0;
87                 flag=FALSE;
88                 for(j=1;j<=ktup;++j) {
89                         residue = seq_array[naseq][i+j-1];
90                         if((residue<0) || (residue > max_aa)){
91                                 flag=TRUE;
92                                 break;
93                         }
94                         code += ((residue) * a[j]);
95                 }
96                 if(flag)
97                         continue;
98                 ++code;
99                 if(pl[code]!=0)tptr[i]=pl[code];
100                 pl[code]=i;
101         }
102 }
103
104
105 static void make_n_ptrs(int *tptr,int *pl,int naseq,int len)
106 {
107         static int pot[]={ 0, 1, 4, 16, 64, 256, 1024, 4096 };
108         int i,j,limit,code,flag;
109         int residue;
110         
111         limit = (int) pow((double)4,(double)ktup);
112         
113         for(i=1;i<=limit;++i)
114                 pl[i]=0;
115         for(i=1;i<=len;++i)
116                 tptr[i]=0;
117         
118         for(i=1;i<=len-ktup+1;++i) {
119                 code=0;
120                 flag=FALSE;
121                 for(j=1;j<=ktup;++j) {
122                         residue = seq_array[naseq][i+j-1];
123                         if((residue<0) || (residue>4)){
124                                 flag=TRUE;
125                                 break;
126                         }
127                         code += ((residue) * pot[j]);  /* DES */
128                 }
129                 if(flag)
130                         continue;
131                 ++code;
132                 if(pl[code]!=0)
133                         tptr[i]=pl[code];
134                 pl[code]=i;
135         }
136 }
137
138
139 static void put_frag(int fs,int v1,int v2,int flen)
140 {
141         int end;
142         accum[0][curr_frag]=fs;
143         accum[1][curr_frag]=v1;
144         accum[2][curr_frag]=v2;
145         accum[3][curr_frag]=flen;
146         
147         if(!maxsf) {
148                 maxsf=1;
149                 accum[4][curr_frag]=0;
150                 return;
151         }
152         
153         if(fs >= accum[0][maxsf]) {
154                 accum[4][curr_frag]=maxsf;
155                 maxsf=curr_frag;
156                 return;
157         }
158         else {
159                 next=maxsf;
160                 while(TRUE) {
161                         end=next;
162                         next=accum[4][next];
163                         if(fs>=accum[0][next])
164                                 break;
165                 }
166                 accum[4][curr_frag]=next;
167                 accum[4][end]=curr_frag;
168         }
169 }
170
171
172 static int frag_rel_pos(int a1,int b1,int a2,int b2)
173 {
174         int ret;
175         
176         ret=FALSE;
177         if(a1-b1==a2-b2) {
178                 if(a2<a1)
179                         ret=TRUE;
180         }
181         else {
182                 if(a2+ktup-1<a1 && b2+ktup-1<b1)
183                         ret=TRUE;
184         }
185         return ret;
186 }
187
188
189 static void des_quick_sort(int *array1, int *array2, int array_size)
190 /*  */
191 /* Quicksort routine, adapted from chapter 4, page 115 of software tools */
192 /* by Kernighan and Plauger, (1986) */
193 /* Sort the elements of array1 and sort the */
194 /* elements of array2 accordingly */
195 /*  */
196 {
197         int temp1, temp2;
198         int p, pivlin;
199         int i, j;
200         int lst[50], ust[50];       /* the maximum no. of elements must be*/
201                                                                 /* < log(base2) of 50 */
202
203         lst[1] = 1;
204         ust[1] = array_size;
205         p = 1;
206
207         while(p > 0) {
208                 if(lst[p] >= ust[p])
209                         p--;
210                 else {
211                         i = lst[p] - 1;
212                         j = ust[p];
213                         pivlin = array1[j];
214                         while(i < j) {
215                                 for(i=i+1; array1[i] < pivlin; i++)
216                                         ;
217                                 for(j=j-1; j > i; j--)
218                                         if(array1[j] <= pivlin) break;
219                                 if(i < j) {
220                                         temp1     = array1[i];
221                                         array1[i] = array1[j];
222                                         array1[j] = temp1;
223                                         
224                                         temp2     = array2[i];
225                                         array2[i] = array2[j];
226                                         array2[j] = temp2;
227                                 }
228                         }
229                         
230                         j = ust[p];
231
232                         temp1     = array1[i];
233                         array1[i] = array1[j];
234                         array1[j] = temp1;
235
236                         temp2     = array2[i];
237                         array2[i] = array2[j];
238                         array2[j] = temp2;
239
240                         if(i-lst[p] < ust[p] - i) {
241                                 lst[p+1] = lst[p];
242                                 ust[p+1] = i - 1;
243                                 lst[p]   = i + 1;
244                         }
245                         else {
246                                 lst[p+1] = i + 1;
247                                 ust[p+1] = ust[p];
248                                 ust[p]   = i - 1;
249                         }
250                         p = p + 1;
251                 }
252         }
253         return;
254
255 }
256
257
258
259
260
261 static void pair_align(int seq_no,int l1,int l2)
262 {
263         int pot[8],i,j,l,m,flag,limit,pos,tl1,vn1,vn2,flen,osptr,fs;
264         int tv1,tv2,encrypt,subt1,subt2,rmndr;
265         int residue;
266         
267         if(dnaflag) {
268                 for(i=1;i<=ktup;++i)
269                         pot[i] = (int) pow((double)4,(double)(i-1));
270                 limit = (int) pow((double)4,(double)ktup);
271         }
272         else {
273                 for (i=1;i<=ktup;i++)
274                         pot[i] = (int) pow((double)(max_aa+1),(double)(i-1));
275                 limit = (int) pow((double)(max_aa+1),(double)ktup);
276         }
277         
278         tl1 = (l1+l2)-1;
279         
280         for(i=1;i<=tl1;++i) {
281                 slopes[i]=displ[i]=0;
282                 diag_index[i] = i;
283         }
284         
285
286 /* increment diagonal score for each k_tuple match */
287 /* Attempt at guessing the best band by looking at identities*/
288
289         for(i=1;i<=limit;++i) 
290                 {
291                 vn1=zzc[i];
292                 while(TRUE) 
293                         {
294                         if(!vn1) break;
295                         vn2=zzd[i];
296                         while(vn2 != 0) 
297                                 {
298                                 osptr=vn1-vn2+l2;
299                                 ++displ[osptr]; /*PLUG THE Pos Dependant Scheme Here!!!! (For Id only)*/
300                                 vn2=zzb[vn2];
301                                 }
302                         vn1=zza[vn1];
303                         }
304                 }
305
306 /* choose the top SIGNIF diagonals */
307
308         des_quick_sort(displ, diag_index, tl1);
309
310         j = tl1 - signif + 1;
311         if(j < 1) j = 1;
312  
313 /* flag all diagonals within WINDOW of a top diagonal */
314
315         for(i=tl1; i>=j; i--) 
316                 if(displ[i] > 0) {
317                         pos = diag_index[i];
318                         l = (1  >pos-window) ? 1   : pos-window;
319                         m = (tl1<pos+window) ? tl1 : pos+window;
320                         for(; l <= m; l++) 
321                                 slopes[l] = 1;
322                 }
323
324         for(i=1; i<=tl1; i++)  displ[i] = 0; /*reset the diagonals score*/
325
326         
327         next=curr_frag=maxsf=0; 
328         for(i=1;i<=(l1-ktup+1);++i) 
329                 {
330                 encrypt=flag=0;
331                 for(j=1;j<=ktup;++j)
332                 {residue = seq_array[seq_no][i+j-1];if((residue<0) || (residue>max_aa)){flag=TRUE; break;}encrypt += ((residue)*pot[j]);}
333                 if(flag) continue;
334                 else flag=FALSE;
335                 
336                 ++encrypt;      
337                 vn2=zzd[encrypt];
338       
339                 /*now trying to match i-ktup and vn2-ktup*/
340                 while(TRUE) 
341                         {
342                         if(!vn2) 
343                                 {
344                                 flag=TRUE;
345                                 break;
346                                 }
347                         osptr=i-vn2+l2;      /*osptr=Diagonal under investigation*/
348                         if(slopes[osptr]!=1) /*Get the next diagonal if that one is not flagged*/
349                                 {
350                                 vn2=zzb[vn2];
351                                 continue;
352                                 }
353                         flen=0;
354                         fs=ktup;
355                         next=maxsf;     
356                         
357                         /* A-loop*/
358                         while(TRUE) 
359                             {
360                             if(!next)
361                                   {
362                                   ++curr_frag;
363                                   if(curr_frag>=2*max_aln_length) 
364                                           {
365
366                                           return;
367                                           }
368                                   displ[osptr]=curr_frag;
369                                   put_frag(fs,i,vn2,flen); /*sets the coordinates of the fragments*/
370                                   }
371                             else 
372                                   {
373                                   tv1=accum[1][next];
374                                   tv2=accum[2][next];
375                                   if(frag_rel_pos(i,vn2,tv1,tv2)) 
376                                         {
377                                         if(i-vn2==accum[1][next]-accum[2][next]) 
378                                             {
379                                             if(i>accum[1][next]+(ktup-1))
380                                                 fs=accum[0][next]+ktup;
381                                             else  
382                                                   {
383                                                   rmndr=i-accum[1][next];
384                                                   fs=accum[0][next]+rmndr;
385                                                   }
386                                             flen=next;
387                                             next=0;
388                                             continue;
389                                             }
390                                         else 
391                                             {
392                                             if(displ[osptr]==0)
393                                             subt1=ktup;
394                                             else 
395                                                {
396                                                if(i>accum[1][displ[osptr]]+(ktup-1))
397                                                subt1=accum[0][displ[osptr]]+ktup;
398                                                else 
399                                                   {
400                                                   rmndr=i-accum[1][displ[osptr]];
401                                                   subt1=accum[0][displ[osptr]]+rmndr;
402                                                   }
403                                                }
404                                             subt2=accum[0][next]-wind_gap+ktup;
405                                             if(subt2>subt1) 
406                                                 {
407                                                 flen=next;
408                                                 fs=subt2;
409                                                 }
410                                             else 
411                                                 {
412                                                 flen=displ[osptr];
413                                                 fs=subt1;
414                                                 }
415                                             next=0;
416                                             continue;
417                                             }
418                                            
419                                         }
420                                   else 
421                                         {
422                                         next=accum[4][next];
423                                         continue;
424                                         }
425                                   }
426                             break;
427                         }
428                 /*
429                 * End of Aloop
430                 */
431                 
432                         vn2=zzb[vn2];
433                 }
434         }
435
436 }                
437
438 int ** show_pair(int istart, int iend, int jstart, int jend, int *in_seqlen_array, char **in_seq_array, int dna_ktup, int dna_window, int dna_wind_gap, int dna_signif,int prot_ktup, int prot_window,int prot_wind_gap,int prot_signif, int in_nseqs,int in_dnaflag, int in_max_aa, int in_max_aln_length  )
439 {
440         int i,j,dsr;
441         double calc_score;
442         int **tmat;
443         
444         seqlen_array=vcalloc ( in_nseqs+1, sizeof(int));
445         for ( i=0; i< in_nseqs; i++)seqlen_array[i+1]=in_seqlen_array[i];
446         
447         
448         seq_array=declare_char ( in_nseqs+1, in_max_aln_length);
449         for ( i=0; i< in_nseqs; i++)sprintf (seq_array[i+1], "%s",in_seq_array[i]); 
450
451         
452         nseqs=in_nseqs;
453         dnaflag=in_dnaflag;
454         max_aa=in_max_aa;
455         max_aln_length=in_max_aln_length;
456
457         
458         tmat=declare_int ( nseqs+1, nseqs+1);
459         accum=declare_int( 5, 2*max_aln_length+1);
460
461         displ      = (int *) vcalloc( (2*max_aln_length +1), sizeof (int) );
462         slopes     = (char *)vcalloc( (2*max_aln_length +1) , sizeof (char));
463         diag_index = (int *) vcalloc( (2*max_aln_length +1) , sizeof (int) );
464
465         zza = (int *)vcalloc( (max_aln_length+1),sizeof (int) );
466         zzb = (int *)vcalloc( (max_aln_length+1),sizeof (int) );
467
468         zzc = (int *)vcalloc( (max_aln_length+1), sizeof (int) );
469         zzd = (int *)vcalloc( (max_aln_length+1), sizeof (int) );
470
471         if(dnaflag) {
472                 ktup     = dna_ktup;
473                 window   = dna_window;
474                 signif   = dna_signif;
475                 wind_gap = dna_wind_gap;
476         }
477         else {
478                 ktup     = prot_ktup;
479                 window   = prot_window;
480                 signif   = prot_signif;
481                 wind_gap = prot_wind_gap;
482         }
483
484         for(i=istart+1;i<=iend;++i) 
485                 {
486                 if(dnaflag)
487                         make_n_ptrs(zza,zzc,i,seqlen_array[i]);
488                 else
489                         make_p_ptrs(zza,zzc,i,seqlen_array[i]);
490                 for(j=MAX(jstart+1, i+1);j<=jend;++j) 
491                         {
492                             if (i!=j)
493                                {
494                                    if(dnaflag)
495                                        make_n_ptrs(zzb,zzd,j,seqlen_array[j]);
496                                    else
497                                        make_p_ptrs(zzb,zzd,j,seqlen_array[j]);
498
499                                    pair_align(i,seqlen_array[i],seqlen_array[j]);
500
501                                    if(!maxsf)
502                                        calc_score=0.0;
503                                    else {
504                                        calc_score=(double)accum[0][maxsf];
505                                        if(percent) {
506                                            dsr=(seqlen_array[i]<seqlen_array[j]) ?
507                                                seqlen_array[i] : seqlen_array[j];
508                                            calc_score = (calc_score/(double)dsr) * 100.0;
509                                        }
510                                    }
511
512                                    tmat[i-1][j-1] = (int)(((100.0 - calc_score)/100.0)*1000);
513                                    tmat[j-1][i-1] = (int)(((100.0 - calc_score)/100.0)*1000);
514                                    fprintf ( stderr, "\r[%d %d]=> %.2f",i, j, (float)calc_score );
515                                }
516                         }
517                 }
518
519         free_int ( accum, -1);
520
521         vfree(displ);
522         vfree(slopes);
523         vfree(diag_index);
524
525         vfree(zza);
526         vfree(zzb);
527         vfree(zzc);
528         vfree(zzd);
529         return tmat;
530 }
531
532 /*********************************COPYRIGHT NOTICE**********************************/
533 /*© Centro de Regulacio Genomica */
534 /*and */
535 /*Cedric Notredame */
536 /*Tue Oct 27 10:12:26 WEST 2009. */
537 /*All rights reserved.*/
538 /*This file is part of T-COFFEE.*/
539 /**/
540 /*    T-COFFEE is free software; you can redistribute it and/or modify*/
541 /*    it under the terms of the GNU General Public License as published by*/
542 /*    the Free Software Foundation; either version 2 of the License, or*/
543 /*    (at your option) any later version.*/
544 /**/
545 /*    T-COFFEE is distributed in the hope that it will be useful,*/
546 /*    but WITHOUT ANY WARRANTY; without even the implied warranty of*/
547 /*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the*/
548 /*    GNU General Public License for more details.*/
549 /**/
550 /*    You should have received a copy of the GNU General Public License*/
551 /*    along with Foobar; if not, write to the Free Software*/
552 /*    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA*/
553 /*...............................................                                                                                      |*/
554 /*  If you need some more information*/
555 /*  cedric.notredame@europe.com*/
556 /*...............................................                                                                                                                                     |*/
557 /**/
558 /**/
559 /*      */
560 /*********************************COPYRIGHT NOTICE**********************************/