Next version of JABA
[jabaws.git] / binaries / src / tcoffee / t_coffee_source / util_dp_cdna_fasta_nw.c
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <math.h>
4 #include <stdarg.h>
5 #include <string.h>
6
7 #include "io_lib_header.h"
8 #include "util_lib_header.h"
9 #include "define_header.h"
10 #include "dp_lib_header.h"
11
12
13 int cfasta_cdna_pair_wise (Alignment *A,int*ns, int **l_s,Constraint_list *CL)
14     {
15
16 /*TREATMENT OF THE TERMINAL GAP PENALTIES*/
17 /*TG_MODE=0---> gop and gep*/
18 /*TG_MODE=1---> ---     gep*/
19 /*TG_MODE=2---> ---     ---*/
20         int maximise;
21                 
22 /*VARIABLES FOR THE MULTIPLE SEQUENCE ALIGNMENT*/
23         int **tot_diag;
24         
25         int *diag;
26         int ktup;
27         static int n_groups;
28         static char **group_list;
29         int score, new_score;
30         int n_chosen_diag=0;
31         int step;
32         int max_n_chosen_diag;
33         int l0, l1;
34         Alignment *B;
35         /********Prepare Penalties******/
36                 
37         maximise=CL->maximise;
38         ktup=CL->ktup;
39
40         /********************************/
41         if ( !group_list)
42            {
43              
44                group_list=make_group_aa (&n_groups, CL->matrix_for_aa_group);
45            }
46         B=dna_aln2_3frame_cdna_aln(A, ns, l_s);
47         l0=strlen(B->seq_al[0]);
48         l1=strlen(B->seq_al[3]);
49         tot_diag=evaluate_diagonals_cdna ( B, ns, l_s, CL, maximise,n_groups,group_list, ktup);
50         
51         max_n_chosen_diag=100;
52         n_chosen_diag=step=10 ;
53         n_chosen_diag=MIN(n_chosen_diag, max_n_chosen_diag);
54         
55         
56         diag=extract_N_diag (l0,l1, tot_diag, n_chosen_diag,2);
57         score    =make_fasta_cdna_pair_wise ( A,B, ns, l_s, CL, diag);
58         
59        
60         new_score=0;
61         vfree ( diag);
62         
63         while (new_score!=score && n_chosen_diag< max_n_chosen_diag )
64           {
65             
66             score=new_score;
67             ungap_sub_aln ( A, ns[0], l_s[0]);
68             ungap_sub_aln ( A, ns[1], l_s[1]);
69             
70             
71             n_chosen_diag+=step;
72             n_chosen_diag=MIN(n_chosen_diag, max_n_chosen_diag);
73             
74             diag     =extract_N_diag (l0,l1, tot_diag, n_chosen_diag,3); 
75             new_score=make_fasta_cdna_pair_wise (  A, B,ns, l_s, CL, diag);
76             vfree ( diag);
77           }
78         
79         score=new_score;
80         free_int (tot_diag, -1);
81         free_aln(B);
82         return score;
83     }
84
85
86 int fasta_cdna_pair_wise (Alignment *A,int*ns, int **l_s,Constraint_list *CL)
87     {
88 /*TREATMENT OF THE TERMINAL GAP PENALTIES*/
89 /*TG_MODE=0---> gop and gep*/
90 /*TG_MODE=1---> ---     gep*/
91 /*TG_MODE=2---> ---     ---*/
92
93
94         int maximise;
95         int l0, l1;     
96 /*VARIABLES FOR THE MULTIPLE SEQUENCE ALIGNMENT*/
97         int **tot_diag;
98         int *diag;
99         int ktup;
100         static int n_groups;
101         static char **group_list;
102         int score;
103         Alignment *B;
104
105         /********Prepare Penalties******/
106         
107         
108         maximise=CL->maximise;
109         ktup=CL->ktup;
110         /********************************/
111         
112
113         
114         if ( !group_list)
115            {
116              
117                group_list=make_group_aa (&n_groups, CL->matrix_for_aa_group);
118            }
119
120         B=dna_aln2_3frame_cdna_aln(A, ns, l_s);
121         B->nseq=6;
122         
123         l0=strlen ( B->seq_al[0]);
124         l1=strlen ( B->seq_al[3]);
125         
126
127         tot_diag=evaluate_diagonals_cdna( B, ns, l_s, CL, maximise,n_groups,group_list, ktup);
128
129         
130         diag=extract_N_diag (l0, l1, tot_diag,20,1);
131         score=make_fasta_cdna_pair_wise ( A,B, ns, l_s, CL, diag);
132         
133         free_aln(B);
134         free_int (tot_diag, -1);
135         vfree (diag);
136         return score;
137     }
138
139 Dp_Model* initialize_dna_dp_model (Constraint_list *CL)
140     {
141       Dp_Model *M;
142       int a, b, c,d;
143       int f0, f1;
144       int deltaf1, deltaf0,deltatype;
145       int type, type1, type0;
146             
147       M=vcalloc ( 1, sizeof (Dp_Model));
148      
149       for (M->nstate=0,f0=0; f0<3; f0++)
150               for ( f1=0; f1<3; f1++)M->nstate+=3;
151       
152       M->UM=M->nstate++;
153       M->START=M->nstate++;
154       M->END  =M->nstate++;
155
156       M->TG_MODE=CL->TG_MODE;
157       M->F_TG_MODE=0;
158       M->gop=CL->gop*SCORE_K;
159       M->gep=CL->gep*SCORE_K;
160
161       
162
163       M->f_gop=CL->f_gop*SCORE_K;
164       M->f_gep=CL->f_gep*SCORE_K;
165       
166
167       M->bounded_model=declare_int (M->nstate+1, M->nstate+1); 
168       M->model=declare_int (M->nstate+1, M->nstate+1); 
169       for ( a=0; a<=M->nstate; a++)
170         for ( b=0; b<= M->nstate; b++)
171           M->model[a][b]=UNDEFINED;
172       M->model_properties=declare_int ( M->nstate, 10);
173       
174       a=0;     
175       M->TYPE=a++;M->F0=a++;M->F1=a++; M->LEN_I=a++; M->LEN_J=a++; M->DELTA_I=a++;M->DELTA_J=a++;M->EMISSION=a++;M->TERM_EMISSION=a++;
176       a=M->nstate;
177       M->NON_CODING=a++; M->INSERTION=a++; M->DELETION=a++; M->CODING0=a++; M->CODING1=a++;M->CODING2=a++;
178       
179       
180       for ( a=0,f0=0; f0<3; f0++)
181               for ( f1=0; f1<3; f1++, a+=3)
182                 {
183                   M->model_properties[a+0][M->TYPE]=M->CODING0;
184                   M->model_properties[a+0][M->F0]=f0;
185                   M->model_properties[a+0][M->F1]=f1;
186                   M->model_properties[a+0][M->LEN_I]=1;
187                   M->model_properties[a+0][M->LEN_J]=1;
188                   M->model_properties[a+0][M->DELTA_I]=-1;
189                   M->model_properties[a+0][M->DELTA_J]= 0;      
190                   M->model_properties[a+0][M->EMISSION]=0;
191                   M->model_properties[a+0][M->TERM_EMISSION]=0;
192                   
193                   M->model_properties[a+1][M->TYPE]=M->DELETION;
194                   M->model_properties[a+1][M->F0]=f0;
195                   M->model_properties[a+1][M->F1]=f1;
196                   M->model_properties[a+1][M->LEN_I]=1;
197                   M->model_properties[a+1][M->LEN_J]=0;          
198                   M->model_properties[a+1][M->DELTA_I]=-1;
199                   M->model_properties[a+1][M->DELTA_J]=+1;
200                   M->model_properties[a+1][M->EMISSION]=M->gep;
201                   M->model_properties[a+1][M->TERM_EMISSION]=(M->TG_MODE==2)?0:M->gep;
202
203                   M->model_properties[a+2][M->TYPE]=M->INSERTION;
204                   M->model_properties[a+2][M->F0]=f0;
205                   M->model_properties[a+2][M->F1]=f1;
206                   M->model_properties[a+2][M->LEN_I]=0;
207                   M->model_properties[a+2][M->LEN_J]=1;
208                   M->model_properties[a+2][M->DELTA_I]= 0;
209                   M->model_properties[a+2][M->DELTA_J]=-1;
210                   M->model_properties[a+2][M->EMISSION]=M->gep;
211                   M->model_properties[a+2][M->TERM_EMISSION]=(M->TG_MODE==2)?0:M->gep;
212                 }
213
214       /*UM" Unmatched State*/
215       M->model_properties[a][M->TYPE]=M->NON_CODING;
216       M->model_properties[a][M->F0]=0;
217       M->model_properties[a][M->F1]=0;
218       M->model_properties[a][M->LEN_I]=1;
219       M->model_properties[a][M->LEN_J]=1;
220       M->model_properties[a][M->DELTA_I]=-1;
221       M->model_properties[a][M->DELTA_J]=0;
222       M->model_properties[a][M->EMISSION]=M->f_gep;
223       M->model_properties[a][M->TERM_EMISSION]=(M->F_TG_MODE==2)?0:M->f_gep;
224       
225
226       M->model_properties[M->START][M->TYPE]=M->NON_CODING;
227       M->model_properties[a+1][M->F0]=0;
228       M->model_properties[a+1][M->F1]=0;
229       M->model_properties[a+1][M->LEN_I]=0;
230       M->model_properties[a+1][M->LEN_J]=0;
231       M->model_properties[a+1][M->DELTA_I]=0 ;
232       M->model_properties[a+1][M->DELTA_J]=0;
233       M->model_properties[a+1][M->EMISSION]=0;
234       M->model_properties[a+1][M->TERM_EMISSION]=0;
235       
236       M->model_properties[M->END][M->TYPE]=M->NON_CODING;
237       M->model_properties[a+2][M->F0]=0;
238       M->model_properties[a+2][M->F1]=0;
239       M->model_properties[a+2][M->LEN_I]=0;
240       M->model_properties[a+2][M->LEN_J]=0;
241       M->model_properties[a+2][M->DELTA_I]=0 ;
242       M->model_properties[a+2][M->DELTA_J]=0;
243       M->model_properties[a+2][M->EMISSION]=0;
244       M->model_properties[a+2][M->TERM_EMISSION]=0;
245
246       /*1: SET THE INDEL PENALTIES*/
247      
248
249       for ( a=0; a< M->START; a++)
250               {
251                 deltaf0=M->model_properties[M->START][M->F0]-M->model_properties[a][M->F0];
252                 deltaf1=M->model_properties[M->START][M->F1]-M->model_properties[a][M->F1];
253                 if      ( deltaf0==0 && deltaf1==0)deltatype=0;
254                 else if ( deltaf0<=0 && deltaf1<=0)deltatype=1;
255                 else deltatype=-1;
256                 type=M->model_properties[a][M->TYPE];
257
258                 if      ( type==M->NON_CODING)               M->model[a][M->END]=M->model[M->START][a]=(M->F_TG_MODE==0)?M->f_gop:0;
259                 else if ( type==M->CODING0   && deltatype==0)M->model[a][M->END]=M->model[M->START][a]=ALLOWED;
260                 else if ( type==M->CODING0   && deltatype==1)M->model[a][M->END]=M->model[M->START][a]=(M->F_TG_MODE==0)?M->f_gop:0;
261                 else if ( type==M->INSERTION && deltatype==0)M->model[a][M->END]=M->model[M->START][a]=(M->TG_MODE==0)?M->gop:0;
262                 else if ( type==M->INSERTION && deltatype==1)M->model[a][M->END]=M->model[M->START][a]=(M->TG_MODE==0)?M->gop:0+(M->F_TG_MODE==0)?M->f_gop:0;
263                 else if ( type==M->DELETION  && deltatype==0)M->model[a][M->END]=M->model[M->START][a]=(M->TG_MODE==0)?M->gop:0;
264                 else if ( type==M->DELETION  && deltatype==1)M->model[a][M->END]=M->model[M->START][a]=(M->TG_MODE==0)?M->gop:0+(M->F_TG_MODE==0)?M->f_gop:0;
265                 else  M->model[a][M->END]=M->model[M->START][a]=UNDEFINED;
266
267                 /*
268                 if      (type==M->NON_CODING ||M->model_properties[a][M->F0] ||M->model_properties[a][M->F1]) M->model[M->START][a]=M->model[a][M->END]=(M->F_TG_MODE==0)?M->f_gop:0;
269                 else if (type==M->INSERTION || type==M->DELETION)M->model[M->START][a]=M->model[a][M->END]=(  M->TG_MODE==0)?M->gop:0;
270                 else     M->model[M->START][a]=M->model[a][M->END]=ALLOWED;
271                 */
272                 
273                 for ( b=0; b< M->START; b++)
274                   {
275                     
276                     deltaf0=M->model_properties[a][M->F0]-M->model_properties[b][M->F0];
277                     deltaf1=M->model_properties[a][M->F1]-M->model_properties[b][M->F1];
278                     type0=M->model_properties[a][M->TYPE];
279                     type1=M->model_properties[b][M->TYPE];
280                     
281                     if      ( deltaf0==0 && deltaf1==0)deltatype=0;
282                     else if ( deltaf0<=0 && deltaf1<=0)deltatype=1;
283                     else deltatype=-1;
284                     
285                    
286                     
287                     if      ( type0==M->NON_CODING && type1==M->NON_CODING                 )M->model[a][b]=UNDEFINED;
288                     else if ( type0==M->NON_CODING && type1==M->CODING0                    )M->model[a][b]=ALLOWED ;
289                     else if ( type0==M->NON_CODING && type1==M->INSERTION                  )M->model[a][b]=M->gop;
290                     else if ( type0==M->NON_CODING && type1==M->DELETION                   )M->model[a][b]=M->gop;
291                     
292                     else if ( type0==M->CODING0   && type1==M->NON_CODING                 )M->model[a][b]=M->f_gop;
293                     else if ( type0==M->CODING0   && type1==M->CODING0   &&  deltatype==0 )M->model[a][b]=ALLOWED;
294                     else if ( type0==M->CODING0   && type1==M->CODING0   &&  deltatype==1 )M->model[a][b]=M->f_gop;                 
295                     else if ( type0==M->CODING0   && type1==M->INSERTION &&  deltatype==0 )M->model[a][b]=M->gop;
296                     else if ( type0==M->CODING0   && type1==M->INSERTION &&  deltatype==1 )M->model[a][b]=M->gop+M->f_gop;                  
297                     else if ( type0==M->CODING0   && type1==M->DELETION  &&  deltatype==0 )M->model[a][b]=M->gop;
298                     else if ( type0==M->CODING0   && type1==M->DELETION  &&  deltatype==1 )M->model[a][b]=M->gop+M->f_gop;
299                     
300                     else if ( type0==M->INSERTION && type1==M->NON_CODING                 )M->model[a][b]=M->f_gop;
301                     else if ( type0==M->INSERTION && type1==M->CODING0   &&  deltatype==0 )M->model[a][b]=ALLOWED;
302                     else if ( type0==M->INSERTION && type1==M->CODING0   &&  deltatype==1 )M->model[a][b]=M->f_gop;                 
303                     else if ( type0==M->INSERTION && type1==M->INSERTION &&  deltatype==0 )M->model[a][b]=ALLOWED;
304                     else if ( type0==M->INSERTION && type1==M->INSERTION &&  deltatype==1 )M->model[a][b]=M->f_gop;                 
305                     else if ( type0==M->INSERTION && type1==M->DELETION  &&  deltatype==0 )M->model[a][b]=M->gop;
306                     else if ( type0==M->INSERTION && type1==M->DELETION  &&  deltatype==1 )M->model[a][b]=M->gop+M->f_gop;
307                     
308                     else if ( type0==M->DELETION  && type1==M->NON_CODING                 )M->model[a][b]=M->f_gop;
309                     else if ( type0==M->DELETION  && type1==M->CODING0   &&  deltatype==0 )M->model[a][b]=ALLOWED;
310                     else if ( type0==M->DELETION  && type1==M->CODING0   &&  deltatype==1 )M->model[a][b]=M->f_gop;
311                     else if ( type0==M->DELETION  && type1==M->INSERTION &&  deltatype==0 )M->model[a][b]=M->gop;
312                     else if ( type0==M->DELETION  && type1==M->INSERTION &&  deltatype==1 )M->model[a][b]=M->gop+M->f_gop;
313                     else if ( type0==M->DELETION  && type1==M->DELETION  &&  deltatype==0 )M->model[a][b]=ALLOWED;
314                     else if ( type0==M->DELETION  && type1==M->DELETION  &&  deltatype==1 )M->model[a][b]=M->f_gop;
315                     
316                     else {M->model[a][b]=UNDEFINED;}
317                     
318                   }
319               }
320     
321                
322       /*2 SET THE FRAMESHIFT PENALTIES
323                 
324       for ( a=0; a< M->START; a++)
325               {
326                 type=M->model_properties[a][M->TYPE];
327                 
328                 for ( b=0; b< M->START; b++)
329                   {
330                     deltaf0=M->model_properties[a][M->F0]-M->model_properties[b][M->F0];
331                     deltaf1=M->model_properties[a][M->F1]-M->model_properties[b][M->F1];
332                    
333                     
334                     
335                     
336                     if (b==M->UM)                M->model[a][b]+=M->f_gop;
337                     else if (a==M->UM)                M->model[a][b]+=ALLOWED;
338                     else if (deltaf1==0 && deltaf0==0)M->model[a][b]+=ALLOWED;
339                     else if (deltaf1<=0 && deltaf0<=0)M->model[a][b]+=M->f_gop;
340                     else M->model[a][b]=UNDEFINED;
341                   }
342                 
343               }         
344       M->model[M->UM][M->UM]=UNDEFINED;
345       */
346      
347
348       for (c=0,a=0, d=0; a< M->START; a++)
349         for ( b=0; b<M->START; b++, d++)
350           {
351             if (M->model[a][b]!=UNDEFINED)
352               {
353                 M->bounded_model[b][1+M->bounded_model[b][0]++]=a;
354                 c++;
355               }
356           }
357       return M;
358     }
359 int make_fasta_cdna_pair_wise (Alignment *B,Alignment *A,int*in_ns, int **l_s,Constraint_list *CL, int *diag)
360     {
361       int a,c,p,k;
362       Dp_Result *DPR;
363       static Dp_Model  *M;
364       int l0, l1;
365       int len_i, len_j;
366       int f0=0, f1=0;
367       int deltaf0, deltaf1, delta;
368       int nr1, nr2;
369       int ala, alb, aa0, aa1;
370       int type;
371       
372       char **al;
373       int **tl_s;
374       int *tns;
375       /*DEBUG*/
376       int debug_cdna_fasta=0;
377       Alignment *DA;
378       int score;
379       int state,prev_state;
380       int t, e;
381       int a1, a2;
382       
383       
384       l0=strlen ( B->seq_al[l_s[0][0]]);
385       l1=strlen ( B->seq_al[l_s[1][0]]);
386
387       al=declare_char (2, l0+l1+1); 
388       B=realloc_aln2 (B,B->nseq,l0+l1+1);
389
390
391       free_int (B->cdna_cache, -1);
392       B->cdna_cache=declare_int(1, l0+l1+1);
393       
394       if ( !M)M=initialize_dna_dp_model (CL);
395
396      
397       M->diag=diag;
398
399       tl_s=declare_int (2, 2);tns=vcalloc(2, sizeof(int));tl_s[0][0]=0;tl_s[1][0]=3;tns[0]=tns[1]=1;
400       DPR=make_fast_dp_pair_wise (A,tns, tl_s,CL,M);
401       vfree(tns);free_int(tl_s, -1);
402
403
404       
405       /*new_trace_back*/
406       a=p=0;
407       aa0=aa1=ala=alb=0;
408       while ( (k=DPR->traceback[a++])!=M->START);
409       while ( (k=DPR->traceback[a++])!=M->END)
410         {
411           
412           f0=M->model_properties[k][M->F0];
413           f1=M->model_properties[k][M->F1];
414
415           len_i=M->model_properties[k][M->LEN_I];
416           len_j=M->model_properties[k][M->LEN_J];
417           
418           type=M->model_properties[k][M->TYPE];
419           
420           
421
422           if (type==M->CODING0)
423             {
424               deltaf0=(aa0*3+f0)-ala;
425               deltaf1=(aa1*3+f1)-alb;
426
427               delta=MAX(deltaf0, deltaf1);
428               
429               for (nr1=0, nr2=0,c=0; c<delta; c++, nr1++, nr2++,p++)              
430                       {
431                         if (nr1<deltaf0 && ala<l0)al[0][p]=B->seq_al[l_s[0][0]][ala++];
432                         else al[0][p]='-';
433                         
434                         if (nr2<deltaf1 && alb<l1)al[1][p]=B->seq_al[l_s[1][0]][alb++];
435                         else al[1][p]='-'; 
436                         
437                         B->cdna_cache[0][p]=M->NON_CODING;      
438                         if ( is_gap(al[1][p]) && is_gap(al[0][p]))p--;
439                         else if ( debug_cdna_fasta)fprintf (stderr, "\nUM: %c %c",  al[0][p], al[1][p]);
440                       } 
441               for ( c=0; c< 3; c++, p++)
442                 {
443                   if ( c==0)B->cdna_cache[0][p]=M->CODING0;
444                   else if ( c==1)B->cdna_cache[0][p]=M->CODING1;
445                   else if ( c==2)B->cdna_cache[0][p]=M->CODING2;
446                   if (ala<l0)al[0][p]=B->seq_al[l_s[0][0]][ala++];
447                   else al[0][p]='-';
448
449                   if (alb<l1)al[1][p]=B->seq_al[l_s[1][0]][alb++];
450                   else al[1][p]='-';
451                         
452                   if ( is_gap(al[1][p]) && is_gap(al[0][p]))p--;
453                   else if ( debug_cdna_fasta)fprintf (stderr, "\n%d: %c %c",k,  al[0][p], al[1][p]);
454                 }
455             }
456
457           aa0+=len_i;
458           aa1+=len_j;
459         }
460       
461       deltaf0=(aa0*3+f0)-ala;
462       deltaf1=(aa1*3+f1)-alb;
463       delta=MAX(deltaf0, deltaf1);
464       for (nr1=0, nr2=0,c=0; c<delta; c++, nr1++, nr2++,p++)              
465         {
466           if (nr1<deltaf0 && ala<l0)al[0][p]=B->seq_al[l_s[0][0]][ala++];
467           else al[0][p]='-';
468           
469           if (nr2<deltaf1 && alb<l1)al[1][p]=B->seq_al[l_s[1][0]][alb++];
470           else al[1][p]='-'; 
471           
472           B->cdna_cache[0][p]=M->NON_CODING;    
473           if ( is_gap(al[1][p]) && is_gap(al[0][p]))p--;
474           else if ( debug_cdna_fasta)fprintf (stderr, "\nUM: %c %c",  al[0][p], al[1][p]);
475         }
476       
477
478       /*End New traceback*/
479       
480
481
482
483       al[0][p]='\0';
484       al[1][p]='\0';
485
486
487       sprintf( B->seq_al[l_s[0][0]], "%s", al[0]);
488       sprintf( B->seq_al[l_s[1][0]], "%s", al[1]);
489       B->len_aln=strlen (al[0]);
490       B->nseq=2;
491      
492       
493      
494       
495       if ( debug_cdna_fasta)
496           {
497             fprintf ( stderr, "\nA-A=%d, %d", CL->M['a'-'A']['a'-'A'], CL->M['a'-'A']['a'-'A'] *SCORE_K);
498             for ( a=1; a<diag[0]; a++)
499               {
500                 fprintf ( stderr, "\nchosen diag: %d", diag[a]);
501               }
502             
503             fprintf ( stderr, "\n  GOP=%d   GEP=%d   TG_MODE=%d", M->gop, M->gep, M->TG_MODE);
504             fprintf ( stderr, "\nF_GOP=%d F_GEP=%d F_TG_MODE=%d", M->gop, M->gep, M->F_TG_MODE);
505             
506             DA=copy_aln (B, NULL);
507             DA=realloc_aln2 (DA,6,(DA->len_aln+1));
508         
509
510             for ( a=0; a<B->len_aln; a++)
511               {
512
513                 fprintf ( stderr, "\n%d", DA->cdna_cache[0][a]);
514                 if (DA->cdna_cache[0][a]>=M->CODING0)DA->seq_al[DA->nseq][a]=DA->cdna_cache[0][a]-M->nstate+'0';
515                 else DA->seq_al[DA->nseq][a]=DA->cdna_cache[0][a]-M->nstate+'0';
516
517                 if (DA->cdna_cache[0][a]==M->CODING0)
518                   {
519                     DA->seq_al[DA->nseq+1][a]=translate_dna_codon (DA->seq_al[0]+a,'*');
520                     DA->seq_al[DA->nseq+2][a]=translate_dna_codon (DA->seq_al[1]+a,'*');
521                   }
522                 else
523                   {
524                     DA->seq_al[DA->nseq+1][a]='-'; 
525                     DA->seq_al[DA->nseq+2][a]='-'; 
526                   }
527                 
528               }
529             DA->nseq+=3;
530             print_aln (DA);
531             
532             free_aln(DA);                     
533             score=0;
534             
535             
536             for (prev_state=M->START,a=0; a< DA->len_aln;)
537               {
538                 state=DA->cdna_cache[0][a];
539                 t=M->model[prev_state][state];
540                 if ( DA->cdna_cache[0][a]==M->CODING0)
541                   {
542                     a1=translate_dna_codon (A->seq_al[0]+a,'x');
543                     a2=translate_dna_codon (A->seq_al[1]+a,'x');
544                     
545                     if ( a1!='x' && a2!='x')
546                       {
547                         e=CL->M[a1-'A'][a2-'A']*SCORE_K;
548                       }
549                   }
550                 else if ( DA->cdna_cache[0][a]>M->CODING0);
551                 else
552                   {
553                     e=M->model_properties[B->cdna_cache[0][a]][M->EMISSION];
554                   }
555                 if ( e==UNDEFINED || t==UNDEFINED) fprintf ( stderr, "\nPROBLEM %d\n", a);
556                 
557                 fprintf ( stderr, "\n[%c..%c: %d(e)+%d(t)=%d]", A->seq_al[0][a], A->seq_al[1][a], e,t,e+t);
558                 score+=e+t;
559                 prev_state=state;
560                 
561                 if (B->cdna_cache[0][a]==M->NON_CODING)a++;
562                 else a+=3;
563                 
564               }
565             
566           }
567       
568       for ( a=0; a<B->len_aln; a++)
569         {
570           
571           if ( B->cdna_cache[0][a]<M->CODING0)B->cdna_cache[0][a]=0;
572           else B->cdna_cache[0][a]=1;
573         }
574       
575       free_char ( al, -1);
576       return DPR->score;
577       
578     }
579                 
580
581
582 Dp_Result * make_fast_dp_pair_wise (Alignment *A,int*ns, int **l_s, Constraint_list *CL,Dp_Model *M)
583         {
584           
585           /*SIZE VARIABLES*/ 
586           
587           int ndiag;
588           int l0, l1, len_al,len_diag;
589           static int max_len_al, max_len_diag;
590           static int mI, mJ;
591          
592           
593           /*EVALUATION*/
594           int **mat;
595           int a1, a2;
596           
597           /*DP VARIABLES*/
598           static int *Mat, *LMat, *trace;
599           int a, i, j,l;
600           int state, cur_state, prev_state;
601           int pos_i,  pos_j;
602           int last_i=0, last_j=0;
603           int prev_i, prev_j;
604           int len_i, len_j, len;
605           int t, e, em;
606           
607           int prev_score; 
608           int pc, best_pc;
609           
610           int *prev;
611           int model_index;
612           /*TRACEBACK*/
613           Dp_Result *DPR;
614           int k=0, next_k;
615           int new_i, new_j;
616           
617           
618           ndiag=M->diag[0];
619
620           l0=strlen (A->seq_al[l_s[0][0]]);
621           l1=strlen (A->seq_al[l_s[1][0]]);
622           len_al =l0+l1+1;      
623           len_diag=ndiag+4;
624           
625           if ( (len_al>max_len_al || len_diag>max_len_diag))
626             {
627               
628               vfree (Mat);
629               vfree (LMat);
630               vfree(trace);         
631               max_len_diag=max_len_al=0;           
632             }
633           
634           if (max_len_al==0)
635             {
636               max_len_al=len_al;
637               max_len_diag=len_diag;
638               mI=max_len_al*max_len_diag;
639               mJ=max_len_diag;
640               
641               
642               Mat  =vcalloc ( M->nstate*max_len_al*max_len_diag, sizeof (int));
643               LMat =vcalloc ( M->nstate*max_len_al*max_len_diag, sizeof (int));
644               trace=vcalloc ( M->nstate*max_len_al*max_len_diag, sizeof (int));
645               
646             }
647           
648           prev=vcalloc ( M->nstate, sizeof (int));
649           DPR=vcalloc ( 1, sizeof ( Dp_Result));
650           DPR->traceback=vcalloc (max_len_al, sizeof (int));
651           
652 /*PREPARE THE EVALUATION*/      
653           if (ns[0]+ns[1]>2)
654             {
655               fprintf ( stderr, "\nERROR: function make_fasta_cdna_pair_wise can only handle two sequences at a time [FATAL:%s]",PROGRAM);
656               crash ("");
657             }
658           mat=CL->M;                                                    
659
660 /*INITIALIZATION OF THE DP MATRICES*/
661
662         for (i=0; i<=l0;i++)
663           {                                             
664             for (j=0; j<=ndiag;j++)
665               {
666                 for ( state=0; state<M->nstate; state++)
667                   {
668                     Mat   [state*mI+i*mJ+j]=UNDEFINED;
669                     LMat  [state*mI+i*mJ+j]=UNDEFINED;
670                     trace [state*mI+i*mJ+j]=M->START;
671                   }
672               }
673           }     
674
675         M->diag[0]=0;
676
677         for (i=0; i<=l0; i++)
678           for ( j=0; j<=ndiag; j++)
679             {
680               pos_j=M->diag[j]-l0+i;
681               pos_i=i;
682               if (!(pos_j==0 || pos_i==0))continue;
683               if ( pos_j<0 || pos_i<0)continue;
684               if ( pos_i==0 && pos_j==0)
685                   {
686                   for ( a=0; a< M->nstate; a++)
687                     {
688                      Mat  [a*mI+i*mJ+j]=0;
689                      LMat [a*mI+i*mJ+j]=0;
690                      trace[a*mI+i*mJ+j]=M->START;
691                     }
692                 }
693               else
694                 {       
695                   l=MAX(pos_i,pos_j);
696                   for ( state=0; state<M->START; state++)
697                     {                
698                       if (pos_j==0 && M->model_properties[state][M->LEN_J])continue;
699                       if (pos_i==0 && M->model_properties[state][M->LEN_I])continue;
700                      
701                      
702                      t=M->model[M->START][state];
703                      e=M->model_properties[state][M->TERM_EMISSION];
704                      Mat   [state*mI+i*mJ+j]=t+e*l;
705                      LMat  [state*mI+i*mJ+j]=l;
706                      trace [state*mI+i*mJ+j]=M->START;
707                     }
708                 }
709             }
710
711 /*DYNAMIC PROGRAMMING: Forward Pass*/
712
713         
714
715         for (i=1; i<=l0;i++)
716           {                                             
717             for (j=1; j<=ndiag;j++)
718               {
719                 pos_j=M->diag[j]-l0+i;
720                 pos_i=i;
721                 
722                 if (pos_j<=0 || pos_j>l1 )continue;
723                 last_i=i;
724                 last_j=j;
725                 
726                 for (cur_state=0; cur_state<M->START; cur_state++)
727                   {
728                     if (M->model_properties[cur_state][M->DELTA_J])
729                       {
730                         prev_j=j+M->model_properties[cur_state][M->DELTA_J];
731                         prev_i=i+M->model_properties[cur_state][M->DELTA_I]*FABS((M->diag[j]-M->diag[prev_j]));                 
732                       }
733                     else
734                       {
735                         prev_j=j;
736                         prev_i=i+M->model_properties[cur_state][M->DELTA_I];
737                       }
738                     len_i=FABS((i-prev_i));
739                     len_j=FABS((M->diag[prev_j]-M->diag[j]));
740                     len=MAX(len_i, len_j);
741                     a1=A->seq_al[M->model_properties[cur_state][M->F0]  ][pos_i-1];
742                     a2=A->seq_al[M->model_properties[cur_state][M->F1]+3][pos_j-1];
743                 
744                     if (M->model_properties[cur_state][M->TYPE]==M->CODING0)
745                       {
746                         if ( a1=='o' || a2=='o')em=-(mat['w'-'A']['w'-'A'])*SCORE_K;
747                         else if (a1=='x' || a2=='x')em=UNDEFINED;
748                         else if ( a1==0 || a2==0)exit (0);
749                         else 
750                           {
751                             em=(mat[a1-'A'][a2-'A'])*SCORE_K;
752                           }
753                       }
754                     else
755                       {
756                         em=M->model_properties[cur_state][M->EMISSION];
757                       }
758                     
759                     
760                    
761                     for (pc=best_pc=UNDEFINED, model_index=1; model_index<=M->bounded_model[cur_state][0]; model_index++)
762                       {
763                         prev_state=M->bounded_model[cur_state][model_index];
764                         
765                         if(prev_i<0 || prev_j<0 ||prev_i>l0 || prev_j>ndiag || len==UNDEFINED)prev_score=UNDEFINED;
766                         else prev_score=Mat[prev_state*mI+prev_i*mJ+prev_j];
767                         t=M->model[prev_state][cur_state];                      
768                         e=em;
769                 
770                         if   (prev_score==UNDEFINED || len==UNDEFINED)e=UNDEFINED;                      
771                         else if (len==0|| e==UNDEFINED)e=UNDEFINED;
772                         else e=e*len;
773                         
774                         if (is_defined_int(3,prev_score,e, t))
775                           {
776                             pc=prev_score+t+e;
777                           }
778                         else  pc=UNDEFINED;
779                         
780                         /*Identify the best previous score*/
781                         if (best_pc==UNDEFINED || (pc>best_pc && pc!=UNDEFINED))
782                           {
783                             prev[cur_state]=prev_state;
784                             best_pc=pc;
785                            
786                           }
787                       }
788                     
789                     Mat[cur_state*mI+i*mJ+j]=best_pc;
790                    
791
792
793                     if ( Mat[cur_state*mI+i*mJ+j]==UNDEFINED)
794                       {
795                         LMat[cur_state*mI+i*mJ+j]=UNDEFINED;
796                         trace[cur_state*mI+i*mJ+j]=UNDEFINED;
797                         continue;
798                       }
799                     
800                     else if ( prev[cur_state]==cur_state)
801                       {
802                         LMat [cur_state*mI+i*mJ+j]=     LMat [cur_state*mI+prev_i*mJ+prev_j]+len;
803                         trace[cur_state*mI+i*mJ+j]=     trace[cur_state*mI+prev_i*mJ+prev_j];
804                       }
805                     else
806                       {
807                         LMat[cur_state*mI+i*mJ+j]=len;
808                         trace[cur_state*mI+i*mJ+j]=prev[cur_state];
809                       }
810                   }
811               }
812           }
813         
814         
815         i=last_i;
816         j=last_j;
817         for (pc=best_pc=UNDEFINED, state=0; state<M->START; state++)
818           {
819             t=M->model[state][M->END];
820             e=M->model_properties[state][M->TERM_EMISSION];
821             l=LMat[state*mI+i*mJ+j];
822             
823            
824             if (!is_defined_int(4,t,e,Mat[state*mI+i*mJ+j],l))Mat[state*mI+i*mJ+j]=UNDEFINED;
825             else Mat[state*mI+i*mJ+j]+=t+e*(l);
826             pc=Mat[state*mI+i*mJ+j];
827             
828            
829             if (best_pc==UNDEFINED || (pc>best_pc && pc!=UNDEFINED))
830               {
831                 k=state;
832                 best_pc=pc;
833               }
834           }
835          DPR->score=best_pc;
836         
837 /*TRACEBACK*/ 
838
839
840         e=0;
841         len=0;    
842         
843         
844         while (k!=M->START)
845           {
846             next_k=trace[k*mI+i*mJ+j];
847             new_i=i;
848             new_j=j;
849             l=LMat[k*mI+i*mJ+j];
850                    
851             for (a=0; a< l; a++)
852               {
853                 DPR->traceback[len++]=k;
854               }
855            new_i+=M->model_properties[k][M->DELTA_I]*l;
856            
857            
858            if ( M->model_properties[k][M->DELTA_J])
859              {
860                while ( next_k!=M->START && FABS((M->diag[j]-M->diag[new_j]))!=l)new_j+=M->model_properties[k][M->DELTA_J];
861              }
862
863            i=new_i;
864            j=new_j;
865            k=next_k;
866           }
867         DPR->len=len;
868         DPR->traceback[DPR->len++]=M->START;
869         invert_list_int  (DPR->traceback,DPR->len);
870         DPR->traceback[DPR->len]=M->END;
871         
872         vfree (prev);
873
874         return DPR;
875         
876
877         }
878
879
880
881 int ** evaluate_diagonals_cdna ( Alignment *B, int *ns, int **l_s, Constraint_list *CL,int maximise,int n_groups, char **group_list, int ktup)
882         {
883           int f1, f2, c;
884           int **diag;
885           char *s1, *s2;
886           int p1, p2;
887           int **tot_diag;
888           int n_tot_diag;
889           int l0, l1;
890           
891           
892          
893           
894           
895          
896           if ( ns[0]!=1 || ns[1]!=1)
897             {
898               fprintf ( stderr, "\nERROR 2 SEQUENCES ONLY [FATAL:%s", PROGRAM);
899               crash ("");
900             }
901           
902           
903           
904           
905         
906         l0=strlen ( B->seq_al[0]);
907         l1=strlen ( B->seq_al[3]);
908         n_tot_diag=(l0+l1-1);
909
910         tot_diag=declare_int ( n_tot_diag+1, 2);
911         for ( c=0; c<= n_tot_diag; c++)tot_diag[c][0]=c; 
912           
913         for (f1=0; f1< 3; f1++)
914             {
915               for ( f2=0; f2< 3; f2++)
916                 {
917                   s1=B->seq_al[f1];
918                   s2=B->seq_al[3+f2];
919                   
920                   
921                   p1=strlen (s1);
922                   p2=strlen (s2);
923
924
925                   diag=evaluate_diagonals_for_two_sequences( s1, s2, maximise,NULL,ktup);
926                   for (c=1; c<=(p1+p2-1); c++)
927                     { 
928                       tot_diag[diag[c][0]][1]+=diag[c][1]*diag[c][1];
929                     }
930                   free_int (diag, -1);
931                   
932                 }
933             }
934         
935         
936
937           sort_int (tot_diag+1, 2, 1,0, n_tot_diag-1);    
938           
939           return tot_diag;
940           
941         }
942           
943
944
945
946 /*********************************COPYRIGHT NOTICE**********************************/
947 /*© Centro de Regulacio Genomica */
948 /*and */
949 /*Cedric Notredame */
950 /*Tue Oct 27 10:12:26 WEST 2009. */
951 /*All rights reserved.*/
952 /*This file is part of T-COFFEE.*/
953 /**/
954 /*    T-COFFEE is free software; you can redistribute it and/or modify*/
955 /*    it under the terms of the GNU General Public License as published by*/
956 /*    the Free Software Foundation; either version 2 of the License, or*/
957 /*    (at your option) any later version.*/
958 /**/
959 /*    T-COFFEE is distributed in the hope that it will be useful,*/
960 /*    but WITHOUT ANY WARRANTY; without even the implied warranty of*/
961 /*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the*/
962 /*    GNU General Public License for more details.*/
963 /**/
964 /*    You should have received a copy of the GNU General Public License*/
965 /*    along with Foobar; if not, write to the Free Software*/
966 /*    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA*/
967 /*...............................................                                                                                      |*/
968 /*  If you need some more information*/
969 /*  cedric.notredame@europe.com*/
970 /*...............................................                                                                                                                                     |*/
971 /**/
972 /**/
973 /*      */
974 /*********************************COPYRIGHT NOTICE**********************************/