Next version of JABA
[jabaws.git] / binaries / src / tcoffee / t_coffee_source / util_domain_dp.c
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <math.h>
4 #include <stdarg.h>
5 #include <string.h>
6
7 #include "io_lib_header.h"
8 #include "util_lib_header.h"
9 #include "define_header.h"
10
11 #include "dp_lib_header.h"
12
13 int domain_pair_wise (Alignment *A,int*in_ns, int **in_l_s,Constraint_list *CL )
14         {
15 /*******************************************************************************/
16 /*                SEQ_DOMAIN DP                                                    */
17 /*                                                                             */
18 /*      makes DP between the the ns[0] sequences and the ns[1]                 */
19 /*                                                                             */
20 /*      for MODE, see the function  get_dp_cost                                */
21 /*******************************************************************************/
22         
23         int scale, gop, gep, maximise; 
24         int a, b, i, j,l,x;
25         int best_j;
26
27         int lenal[2], len;
28         int sub;
29         int match;
30         
31         int *ns, **l_s;
32
33
34         int *f;
35         int *pf;
36
37         int *dd;
38         int *dd_len;
39         
40         int * e;
41         int *pe;
42         int * e_len;
43         int *pe_len;
44             
45         int fop;
46         int **pos0;
47         
48         int  **al=NULL;
49         int  **pos_al=NULL;
50
51
52         
53         int ala,LEN;
54         char *buffer;
55         char *char_buf;
56
57
58 /*trace back variables       */
59         TRACE_TYPE *buf_trace=NULL;
60         TRACE_TYPE **trace;
61         TRACE_TYPE k;
62         TRACE_TYPE *tr;
63         int **result_aln;
64         int nseq;
65 /*Test Varaibles*/
66         int score;
67
68 /*Prepare l_s and ns*/
69
70         
71         ns=vcalloc( 2, sizeof (int));
72         l_s=vcalloc ( 2, sizeof (int*));
73         
74         ns[0]=in_ns[1];
75         ns[1]=in_ns[0];
76           
77         l_s[0]=in_l_s[1];
78         l_s[1]=in_l_s[0];
79         lenal[0]=strlen (A->seq_al[l_s[0][0]]);
80         lenal[1]=strlen (A->seq_al[l_s[1][0]]);
81         len=lenal[0]+lenal[1]+2;
82 /********************************/      
83 gop=(CL->gop)*SCORE_K;
84 gep=(CL->gep)*SCORE_K;
85 maximise=CL->maximise;
86 scale=(lenal[1]*SCORE_K*(CL->moca)->moca_scale);
87 /*******************************/
88
89         
90
91 /*DO MEMORY ALLOCATION FOR DP*/
92         
93
94         
95
96
97         buf_trace=vcalloc ( len, sizeof (TRACE_TYPE));  
98         buffer=vcalloc ( 2*len, sizeof (char)); 
99         
100         al    =declare_int  (2, 2*len);  
101         pos_al=declare_int  (2, 2*len);
102         result_aln=declare_int (1,len);
103         char_buf= vcalloc (2*len, sizeof (char));       
104
105         f     =vcalloc (len, sizeof (int));
106        pf     =vcalloc (len, sizeof (int));
107         e     =vcalloc (len, sizeof (int));
108        pe     =vcalloc (len, sizeof (int));
109         e_len =vcalloc (len, sizeof (int));
110        pe_len =vcalloc (len, sizeof (int));
111         dd    =vcalloc (len, sizeof (int));
112         dd_len=vcalloc (len, sizeof (int));
113
114         
115         trace=declare_int (lenal[0]+2, lenal[1]+2);
116
117 /*END OF MEMORY ALLOCATION*/
118         
119         
120                 /*
121                 0(s)   +(dd)
122                   \      |
123                    \     |
124                     \    |
125                      \   |
126                       \  |
127                        \ |
128                         \|
129                 -(e)----O
130                 */ 
131         
132         pos0=aln2pos_simple ( A,-1, ns, l_s);   
133
134         for ( i=0; i<=lenal[0]+1; i++)
135             {       
136             tr=trace[i];
137
138             
139             for ( sub=0,j=0; j<=lenal[1]; j++)
140                 {
141                 if      (i==0 && j==0){tr[j]=1;pe[j]=dd[j]=gop;}
142                 else if (i==0)        {e[j]=pe[j]=dd[j]=gop;dd_len[j]=e_len[j]=pe_len[j]=f[j]=pf[j]=0;tr[j]=-1;}
143                 else if (j==0)
144                      {
145                      for (f[j]=pf[0],best_j=0,a=1; a<=lenal[1]; a++)
146                          {
147                          if (f[j]!=MAX(pf[a]+scale,f[j]))
148                              {
149                              f[j]=pf[a]+scale;
150                              best_j=a;
151                              }                  
152                          }
153                      
154
155                      dd    [j]=e[j]=pe[j]=gop;
156                      dd_len[j]=e_len[j]=0;
157                      tr    [j]=best_j;
158                      
159                      }
160                 else if (i>lenal[0]);
161                 else
162                      {
163                      sub=(CL->get_dp_cost) (A, pos0, ns[0], l_s[0], i-1, pos0, ns[1], l_s[1],j-1,CL);
164                      
165                   
166                      
167                      match=pf[j-1]+sub;
168                      if (a_better_than_b(pf[j]+gop, pe[j]+gep,maximise))
169                          {
170                          e    [j]=pf[j]+gop;
171                          e_len[j]=1;
172                          }
173                      else 
174                          {
175                          e    [j]=pe    [j]+gep;
176                          e_len[j]=pe_len[j]+1;
177                          }
178                      
179                      
180                      if (a_better_than_b(f[j-1]+gop, dd[j-1]+gep,maximise))
181                         {
182                         dd    [j]=f[j-1]+gop;
183                         dd_len[j]=1;
184                         }
185                      else 
186                         {
187                         dd    [j]=dd    [j-1]+gep;
188                         dd_len[j]=dd_len[j-1]+1  ;
189                         }
190                      
191
192
193                      if ( sub!=UNDEFINED)
194                          {                       
195                          
196                          f[j] =best_int(4,maximise,&fop,e[j],match,dd[j],f[0]);
197                          fop-=1;
198                          
199                          if      (fop==-1)fop= e_len[j]*fop;
200                          else if (fop==1 )fop=dd_len[j]*fop;
201                          else if (fop==2 )fop=UNDEFINED;
202                          
203                          
204                          
205                          }
206                      else
207                          {
208                          dd[j]=e[j]=match=-10000;                        
209                          f[j]=f[0];
210                          fop=UNDEFINED;
211                          }
212                      pe    [j]  =e    [j];
213                      pf    [j-1]=f    [j-1];
214                      pe_len[j]  =e_len[j];
215                      tr[j]      =fop;   
216                      }
217         
218                 }
219             
220             pf    [j-1]=f    [j-1];
221             pe    [j]  =e    [j];
222             pe_len[j]  =e_len[j];
223             }
224         score=f[0];
225         i=lenal[0]+1;
226         j=0;
227         ala=0;
228         
229         
230         while (i!=0)
231               {
232              
233
234               k=trace[i][j];
235               if (j==0 && i<=lenal[0])
236                  {
237                  
238                  pos_al[0][ala]=i;
239                  pos_al[1][ala]=j;
240                  al[0][ala]=MATCH;
241                  al[1][ala]=UNALIGNED;
242                  i--;
243                  j=k;
244                  ala++;
245                  }
246               else if ( j==0 && i>lenal[0])
247                  {
248                  j=k;
249                  i--; 
250                  
251                  }
252               else if (k==0)
253                  {              
254                  pos_al[0][ala]=i;
255                  pos_al[1][ala]=j;
256                  al[0][ala]=MATCH;
257                  al[1][ala]=MATCH;
258                  i--;
259                  j--;
260                 
261                  ala++;
262                  }              
263               else if (k!=UNDEFINED && k<0 && i>0)
264                  {
265                  for (x=0; x< -k && i>0; x++)
266                      {
267                      pos_al[0][ala]=i;
268                      pos_al[1][ala]=j;    
269                      al[0][ala]=MATCH;
270                      al[1][ala]=GAP;             
271                      i--;
272                      ala++;
273                      }
274                  }
275               else if (k!=UNDEFINED && k>0 && j>0)
276                  {
277                  for ( x=0; x< k && j>0; x++)
278                      {
279                      pos_al[0][ala]=i;
280                      pos_al[1][ala]=j;    
281                      al[0][ala]=GAP;
282                      al[1][ala]=MATCH;
283                      j--;
284                      ala++;
285                      }
286                  }
287               else if ( k==UNDEFINED){j=0;}
288               
289               }
290
291         
292         LEN=ala;        
293
294         invert_list_int ( pos_al[0], LEN);
295         invert_list_int ( pos_al[1], LEN);              
296         invert_list_int (     al[0], LEN);
297         invert_list_int (     al[1], LEN);      
298
299         
300         /*O: TARGET SEQUENCE  (long)*/
301         /*1: PATTERN SEQUENCE (short)*/
302
303         
304         
305         for ( b=0; b<lenal[1]; b++)result_aln[0][b]=-1;
306         for (l=0, nseq=0, a=0; a< LEN; a++)
307             {
308             i=pos_al[0][a];
309             j=pos_al[1][a];
310             
311             if (al[1][a]==UNALIGNED && l>0)
312                      {
313                      result_aln=realloc_int ( result_aln, read_size_int ( result_aln,sizeof (int*)),len, 1, 0);            
314                      nseq++;
315                      l=0;
316                      for ( b=0; b<lenal[1]; b++)result_aln[nseq][b]=-1;
317                      }
318             
319             else if (al[1][a]==MATCH && al[0][a]==MATCH ){l++;result_aln[nseq][j-1]=i-1;}
320             else if (al[1][a]==MATCH && al[0][a]==GAP   ){l++;result_aln[nseq][j-1]=-1;}
321             
322             }
323         if ( l>0)nseq++;
324
325
326
327
328         A=domain_match_list2aln ( A,ns,l_s,result_aln,nseq,lenal[1]); 
329
330         
331         vfree (f);
332         vfree (pf);
333         vfree (e);
334         vfree (pe);
335         vfree (e_len);
336         vfree (pe_len);
337         vfree (dd_len);
338         vfree (dd);
339         free_int (pos0, -1);
340         vfree (buffer);
341         vfree (char_buf); 
342         vfree (buf_trace);
343         free_int ( pos_al, -1);
344         pos_al=NULL;
345           
346         free_int (     al, -1);
347
348         free_int (trace,-1);
349         free_int ( result_aln, -1);
350         return score;
351         }
352
353
354 Alignment *domain_match_list2aln ( Alignment *A,int *ns,int **l_s,int **ml, int nseq, int len)
355            {
356
357              /*
358                function documentation: start
359                
360                This function edits the alignment given the results obtained by DP
361                ns: ns[0]->number of sequences serarched (TARGET)
362                    ns[1]->number of sequences in the pattern (PATTERN SEQ)
363                l_s:   
364                    l_s[0]->list of sequences in the TARGET...
365                
366                nseq: number of occurences of PATTERN in TARGET
367                len:  length of the PATTERN
368                
369                ml: detail of the nseq matches
370                    ml[x][y]=k-> residue k of TARGET matches residue y of pattern
371                              -> k=-1 means a gap;
372                              
373                NOTE: This implementation can only match ONE target sequence with the PATTERN
374                The Pattern can either be one sequence or a profile.
375
376                function documentation: end
377              */
378
379              
380              
381
382            int a, b, c, d, e;
383            Alignment *B=NULL;
384            int **new_ml;
385            int  *max_ml;
386            int  *start_ml;
387            int tot_nseq;
388            int max_len,seq;
389            char *buf;
390            
391            if ( len==0 || nseq==0)
392               {
393               A->nseq=0;
394               A->len_aln=0;
395               }
396            else
397               {
398               B=copy_aln(A, B);
399               /*1 Extract the sequence used as a pattern, put it on the top*/
400               
401             
402               A=shrink_aln (A, ns[1], l_s[1]);
403               A=realloc_aln2(A, ns[1]+ns[0]*nseq,len+A->len_aln+1);
404               
405               
406               
407               new_ml  =declare_int ( nseq, 3*len);
408               max_ml  =vcalloc ( nseq, sizeof (int));
409               for ( a=0; a<nseq; a++)max_ml[a]=-1;
410
411               start_ml=vcalloc ( nseq, sizeof (int));
412
413               for ( b=0,a=0; a< len; a++, b+=3)
414                   {
415                   for ( max_len=0,c=0; c< nseq; c++)
416                       {
417                       if ( max_ml[c]<0 && ml[c][a]<0)
418                          {
419                          new_ml[c][b]=ml[c][a];
420                          new_ml[c][b+1]=0;
421                          }
422                       else if ( ml[c][a]>=0)
423                          {                     
424                          new_ml[c][b]=ml[c][a];
425                          if ( max_ml[c]<0) start_ml[c]=max_ml[c]=ml[c][a];
426                          for ( d=a+1; d<len; d++){if ( ml[c][d]>=0){ max_ml[c]= ml[c][d];break;}}
427                          if (max_ml[c]!=new_ml[c][b])
428                             {
429                             new_ml[c][b+1]=max_ml[c]-new_ml[c][b]-1;
430                             }
431                          max_len=MAX( max_len, new_ml[c][b+1]);
432                          }
433                       else
434                          {
435                          new_ml[c][b]=ml[c][a];
436                          new_ml[c][b+1]=0;
437                          }
438                       }
439                
440                   for ( c=0; c< nseq; c++){new_ml[c][b+2]=max_len;}
441                   }
442           
443               tot_nseq=ns[1]+ns[0]*nseq;
444            
445               for ( a=0, b=0; a< len ;a++)
446                   {
447                  
448                   /*1: Place the Match Column*/
449                   for ( c=0; c< ns[1]; c++)
450                       {
451                       A->seq_al[c][b]=B->seq_al[l_s[1][c]][a];
452                       A->seq_al[c][b+1]='\0';              
453                       }
454                   for ( e=0,c=ns[1]; c<tot_nseq; c+=ns[0],e++)
455                       for ( d=0; d< ns[0]; d++)
456                           {
457                           if ( new_ml[e][3*a]!=-1)
458                             A->seq_al[c+d][b]=B->seq_al[l_s[0][d]][new_ml[e][3*a]];
459                           else 
460                               A->seq_al[c+d][b]='-';
461                           A->seq_al[c+d][b+1]='\0';
462                        
463                           }
464                   b++;
465
466                  /*2: Add the Gaps before the next_column*/
467                   if ( new_ml[0][3*a+2]>0)
468                      {
469                      for ( c=0; c< ns[1]; c++)
470                          {
471                          buf=generate_null(new_ml[0][3*a+2]);
472                          strcat ( A->seq_al[c],buf);
473                          vfree (buf);
474                          }
475                      for (e=0,c=ns[1];c< tot_nseq; c+=ns[0], e++)
476                          {
477                            buf=extract_char (B->seq_al[l_s[0][0]], new_ml[e][3*a]+1, new_ml[e][3*a+1]);
478                            strcat ( A->seq_al[c],buf);
479                            vfree (buf);
480                            buf=generate_null(new_ml[e][3*a+2]-new_ml[e][3*a+1]);
481                            strcat ( A->seq_al[c],buf);
482                            
483                            vfree (buf);
484                            
485                          }
486                      }
487                   b+=new_ml[0][3*a+2];
488                   }
489               
490               for (e=0,a=ns[1]; a< tot_nseq; a+=ns[0],e++)
491                   {
492                   for ( b=0; b<ns[0]; b++)
493                       {
494                       seq=l_s[0][b];
495                       A->order[a+b][0]=B->order[seq][0];
496                       A->order[a+b][1]=B->order[seq][1];
497                       for ( c=0; c<start_ml[e];c++)A->order[a+b][1]+=!is_gap(B->seq_al[seq][c]);
498                       sprintf ( A->name[a+b], "Repeat_%d", a+b);
499                       }
500                   }
501         
502               free_aln(B);
503               A->nseq=tot_nseq;
504               A->len_aln=strlen ( A->seq_al[0]);
505               
506               }
507            return A;
508            }
509 Alignment * domain_seq2domain (Constraint_list *CL,int scale,int gop,int gep,Alignment *SEQ_DOMAIN, Alignment *TARGET) 
510            {    
511            static Alignment *A;
512            int *n_groups;
513            int **group_list;
514            int a,b,c;
515            
516            A=copy_aln (TARGET, A);
517            A=stack_aln( A, SEQ_DOMAIN);
518
519            
520            n_groups=vcalloc ( 2, sizeof (int));
521            group_list=declare_int (2, A->nseq);
522            
523            n_groups[0]=TARGET->nseq;
524            n_groups[1]=SEQ_DOMAIN->nseq;
525            for (c=0, a=0; a< 2; a++)
526                {
527                for (b=0; b< n_groups[a]; b++, c++)
528                    {
529                    group_list[a][b]=c;
530                    }
531                }           
532            A->score_aln=domain_pair_wise (A, n_groups, group_list,CL);
533            
534            SEQ_DOMAIN=copy_aln (A, SEQ_DOMAIN);
535            vfree (n_groups);
536            free_int (group_list,-1);
537            return SEQ_DOMAIN;
538            }
539            
540
541 /*********************************COPYRIGHT NOTICE**********************************/
542 /*© Centro de Regulacio Genomica */
543 /*and */
544 /*Cedric Notredame */
545 /*Tue Oct 27 10:12:26 WEST 2009. */
546 /*All rights reserved.*/
547 /*This file is part of T-COFFEE.*/
548 /**/
549 /*    T-COFFEE is free software; you can redistribute it and/or modify*/
550 /*    it under the terms of the GNU General Public License as published by*/
551 /*    the Free Software Foundation; either version 2 of the License, or*/
552 /*    (at your option) any later version.*/
553 /**/
554 /*    T-COFFEE is distributed in the hope that it will be useful,*/
555 /*    but WITHOUT ANY WARRANTY; without even the implied warranty of*/
556 /*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the*/
557 /*    GNU General Public License for more details.*/
558 /**/
559 /*    You should have received a copy of the GNU General Public License*/
560 /*    along with Foobar; if not, write to the Free Software*/
561 /*    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA*/
562 /*...............................................                                                                                      |*/
563 /*  If you need some more information*/
564 /*  cedric.notredame@europe.com*/
565 /*...............................................                                                                                                                                     |*/
566 /**/
567 /**/
568 /*      */
569 /*********************************COPYRIGHT NOTICE**********************************/