JWS-112 Bumping version of T-Coffee to version 11.00.8cbe486.
[jabaws.git] / binaries / src / tcoffee / t_coffee_source / dp_lib / util_dp_gotoh_sw.c
1 /******************************COPYRIGHT NOTICE*******************************/
2 /*  (c) Centro de Regulacio Genomica                                                        */
3 /*  and                                                                                     */
4 /*  Cedric Notredame                                                                        */
5 /*  12 Aug 2014 - 22:07.                                                                    */
6 /*All rights reserved.                                                                      */
7 /*This file is part of T-COFFEE.                                                            */
8 /*                                                                                          */
9 /*    T-COFFEE is free software; you can redistribute it and/or modify                      */
10 /*    it under the terms of the GNU General Public License as published by                  */
11 /*    the Free Software Foundation; either version 2 of the License, or                     */
12 /*    (at your option) any later version.                                                   */
13 /*                                                                                          */
14 /*    T-COFFEE is distributed in the hope that it will be useful,                           */
15 /*    but WITHOUT ANY WARRANTY; without even the implied warranty of                        */
16 /*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the                         */
17 /*    GNU General Public License for more details.                                          */
18 /*                                                                                          */
19 /*    You should have received a copy of the GNU General Public License                     */
20 /*    along with Foobar; if not, write to the Free Software                                 */
21 /*    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA             */
22 /*...............................................                                           */
23 /*  If you need some more information                                                       */
24 /*  cedric.notredame@europe.com                                                             */
25 /*...............................................                                           */
26 /******************************COPYRIGHT NOTICE*******************************/
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <math.h>
30 #include <stdarg.h>
31 #include <string.h>
32
33 #include "io_lib_header.h"
34 #include "util_lib_header.h"
35 #include "define_header.h"
36
37 #include "dp_lib_header.h"
38
39 int gotoh_pair_wise_lalign ( Alignment *A, int*ns, int **l_s,Constraint_list *CL)
40     {
41     Alignment *BUF=NULL;
42     Alignment *EA=NULL;
43     
44     int a;
45     BUF=copy_aln (A, BUF);
46     
47     
48     for ( a=0; a<CL->lalign_n_top; a++)
49       {
50         free_aln (A);
51
52         A=copy_aln (BUF, A);
53         
54         A->score_aln=gotoh_pair_wise_sw (A, ns, l_s, CL);
55         EA=fast_coffee_evaluate_output (A, CL);
56
57         output_format_aln (CL->out_aln_format[0],A,EA,"stdout");
58         CL=undefine_sw_aln ( A, CL);
59       }
60     exit (1);
61     return 0;
62     }
63 Constraint_list * undefine_sw_aln ( Alignment *A, Constraint_list *CL)
64   {
65     int a, b, l;
66     int **pos;
67     int  r1, rs1;
68     int  r2, rs2;
69     
70
71
72     pos=aln2pos_simple ( A,A->nseq);
73     
74     for ( l=0; l< A->len_aln; l++)
75       for ( a=0; a< A->nseq-1; a++)
76         {
77           rs1=A->order[a][0];
78           r1 =pos[a][l];
79                 
80           if ( r1<=0)continue;
81           for ( b=a+1; b< A->nseq;b++)
82                   {
83                     rs2=A->order[b][0];
84                     r2 =pos[b][l];
85                     if ( r2<=0)continue;
86                     
87                     CL=undefine_sw_pair ( CL, rs1, r1, rs2, r2);
88                   }
89         }
90     free_int (pos, -1);
91     return CL;
92   }
93 Constraint_list * undefine_sw_pair ( Constraint_list *CL, int s1, int r1, int s2, int r2)
94   {
95     int a, b;
96     
97     if ( !CL->forbiden_pair_list)
98       {
99         
100         CL->forbiden_pair_list=(int ****)vcalloc ( (CL->S)->nseq, sizeof (int ***));
101         for ( a=0; a< ((CL->S)->nseq); a++)
102           {
103             CL->forbiden_pair_list[a]=(int ***)vcalloc ( (CL->S)->nseq, sizeof (int **));
104             for ( b=0; b< ((CL->S)->nseq); b++)
105               CL->forbiden_pair_list[a][b]=(int **)vcalloc ( (CL->S)->len[a]+1, sizeof (int *));
106             
107           }
108       }
109     if ( CL->forbiden_pair_list[s1][s2][r1]==NULL)CL->forbiden_pair_list[s1][s2][r1]=(int *)vcalloc ( (CL->S)->len[s2]+1, sizeof (int));
110     CL->forbiden_pair_list[s1][s2][r1][r2]=1;
111     
112     if ( CL->forbiden_pair_list[s2][s1][r2]==NULL)CL->forbiden_pair_list[s2][s1][r2]=(int *)vcalloc ( (CL->S)->len[s1]+1, sizeof (int));
113     CL->forbiden_pair_list[s2][s1][r2][r1]=1;
114     
115     return CL;
116   }
117        
118 int sw_pair_is_defined ( Constraint_list *CL, int s1, int r1, int s2, int r2)
119         {
120           int d;
121           
122           d=(r1-r2);
123           d=(d<0)?-d:d;
124           
125
126           if ( s1==s2 && d<(CL->sw_min_dist)) return UNDEFINED;
127           else if ( ! CL->forbiden_pair_list) return 1;
128           else if ( CL->forbiden_pair_list[s1][s2][r1]==NULL)return 1;
129           else if ( CL->forbiden_pair_list[s1][s2][r1][r2]==1)return UNDEFINED;
130           else if ( CL->forbiden_pair_list[s1][s2][r1][r2]==0)return 1;
131           
132           else 
133             {
134               crash ("ERROR in function: sw_pair_is_defined\n");
135               return UNDEFINED;
136             }
137             
138         }       
139
140   
141 int gotoh_pair_wise_sw (Alignment *A, int*ns, int **l_s,Constraint_list *CL)
142         {
143 /*******************************************************************************/
144 /*                SMITH AND WATERMAN                                           */
145 /*                                                                             */
146 /*      makes DP between the the ns[0] sequences and the ns[1]                 */
147 /*                                                                             */
148 /*      for MODE, see the function  get_dp_cost                                */
149 /*******************************************************************************/
150         int a, b, d, i, j;
151         int last_i, last_j;
152         int t;
153         int *cc;
154         int *dd, *ddg;
155         int lenal[2], len;
156         
157         int c,s, e,eg, ch,g,h, maximise;
158         int sub;
159         
160         int fop;
161         static int **pos0;
162         
163         char **al=NULL;
164         char **aln=NULL;
165
166         
167         int ala, alb,LEN;
168         char *buffer;
169         char *char_buf;
170         
171 /*trace back variables       */
172         int best_i;
173         int best_j;
174         int best_score;
175         
176         
177         FILE       *long_trace=NULL;
178         TRACE_TYPE *buf_trace=NULL;
179         static TRACE_TYPE **trace;
180         TRACE_TYPE k;
181         TRACE_TYPE *tr;
182         int long_trace_flag=0;
183         int dim;
184 /********Prepare penalties*******/
185         if (CL->moca)
186           {
187             g=(CL->gop+(CL->moca)->moca_scale)*SCORE_K;
188             h=(CL->gep+(CL->moca)->moca_scale)*SCORE_K;
189           }
190         else
191           {
192             g=(CL->gop-CL->nomatch)*SCORE_K;
193             h=(CL->gep-CL->nomatch)*SCORE_K;
194           }
195         fprintf ( stderr, "\n%d %d", g, h);
196         maximise=CL->maximise;
197 /********************************/      
198 /*CLEAN UP AFTER USE*/
199         if ( A==NULL)
200            {
201            free_int (trace,-1);
202            trace=NULL;
203            if ( al)free_char (al,-1);
204            al=NULL;
205            return 0;
206            }       
207 /*DO MEMORY ALLOCATION FOR SW DP*/
208         
209         lenal[0]=strlen (A->seq_al[l_s[0][0]]);
210         lenal[1]=strlen (A->seq_al[l_s[1][0]]);
211         len= (( lenal[0]>lenal[1])?lenal[0]:lenal[1])+1;
212         buf_trace=(int*)vcalloc ( len, sizeof (TRACE_TYPE));    
213         buffer=(char*)vcalloc ( 2*len, sizeof (char));  
214         al=declare_char (2, 2*len);  
215         
216         char_buf=(char*) vcalloc (2*len, sizeof (char));        
217         dd= (int*) vcalloc (len, sizeof (int));
218         cc=(int*) vcalloc (len, sizeof (int));
219         ddg=(int*)vcalloc (len, sizeof (int));
220         
221         
222         if ( len>=MAX_LEN_FOR_DP)
223             {       
224             long_trace_flag=1;
225             long_trace=vtmpfile();         
226             }
227         else
228             {
229             dim=(trace==NULL)?0:read_size_int ( trace,sizeof (int*));
230             trace    =realloc_int ( trace,dim,dim,len-dim, len-dim);
231             }
232         
233 /*END OF MEMORY ALLOCATION*/
234         
235         
236                 /*
237                 0(s)   +(dd)
238                   \      |
239                    \     |
240                     \    |
241                      \   |
242                       \  |
243                        \ |
244                         \|
245                 -(e)----O
246                 */ 
247                        
248         
249         pos0=aln2pos_simple ( A,-1, ns, l_s);   
250
251         
252
253         cc[0]=0;
254
255         best_score=0;
256         best_i=0;
257         best_j=0;
258         
259         tr=(long_trace_flag)?buf_trace:trace[0];
260         tr[0]=(TRACE_TYPE)UNDEFINED;
261
262         t=g;
263         for ( j=1; j<=lenal[1]; j++)
264                 {
265                 cc[j]=t=t+h;
266                 dd[j]=t+g;
267                 tr[j]=(TRACE_TYPE)UNDEFINED;
268                 }
269         if (long_trace_flag)fwrite (buf_trace, sizeof ( TRACE_TYPE),lenal[1]+1, long_trace);
270         
271
272         t=g;
273         for (i=1; i<=lenal[0];i++)
274                         {                      
275                         tr=(long_trace_flag)?buf_trace:trace[i];
276                         s=cc[0];
277                         cc[0]=c=t=t+h;
278                         e=t+g;
279                         tr[0]=(TRACE_TYPE)UNDEFINED;
280
281                         for (eg=0,j=1; j<=lenal[1];j++)
282                                 {
283                                 
284                                 sub=(CL->get_dp_cost) (A, pos0, ns[0], l_s[0], i-1, pos0, ns[1], l_s[1],j-1,CL);
285                                 
286                                 /*get the best Insertion*/
287                                 if ( a_better_than_b ( e, c+g, maximise))
288                                         eg++;
289                                 else 
290                                         eg=1;   
291                                 e=best_of_a_b (e, c+g, maximise)+h;
292                                 
293                                 /*Get the best deletion*/
294                                 if ( a_better_than_b ( dd[j], cc[j]+g, maximise))
295                                         ddg[j]++;
296                                 else 
297                                         ddg[j]=1;
298                                 dd[j]=best_of_a_b( dd[j], cc[j]+g, maximise)+h;
299                                 
300                                 /*Chose Substitution for tie breaking*/
301                                 if ( sub!=UNDEFINED)c=best_int(4,maximise,&fop, e, (s+sub), dd[j],0);
302                                 else 
303                                    {
304                                    c=0;
305                                    fop=3;
306                                    dd[j]=e=0;
307                                    eg=ddg[j]=0;
308                                    }
309
310                                 if ( c>best_score)
311                                    {
312                                    best_i=i;
313                                    best_j=j;
314                                    best_score=c;
315                                    }
316                                 fop-=1;
317                                 s=cc[j];
318                                 cc[j]=c;                          
319                                 
320                         
321                                 if ( fop==-1)
322                                         {tr[j]=(TRACE_TYPE)fop*eg;
323                                         }
324                                 else if ( fop==1)
325                                         {tr[j]=(TRACE_TYPE)fop*ddg[j];
326                                         }
327                                 else if (fop==0)
328                                         {tr[j]=(TRACE_TYPE)0;   
329                                         }       
330                                 else if ( fop==2)
331                                         {
332                                         tr[j]=(TRACE_TYPE)UNDEFINED;
333                                         }
334                                 
335                                 fop= -2;
336                                 }
337                         if (long_trace_flag)
338                             {
339                             fwrite ( buf_trace, sizeof (TRACE_TYPE), lenal[1]+1, long_trace);
340                             }
341                         }
342         
343         
344
345
346         if (best_i==0 ||best_j==0 )
347             {
348             vfree (buf_trace);
349             vfree (buffer);
350             free_char ( al,-1);
351             vfree ( char_buf);
352             vfree ( dd);
353             vfree ( cc);                
354             vfree ( ddg);
355             free_int (pos0, -1);
356             A->len_aln=0;
357             aln=A->seq_al;
358             
359             for ( c=0; c< 2; c++)
360                 {
361                 for ( a=0; a< ns[c]; a++) 
362                     {
363                     aln[l_s[c][a]][0]='\0';
364                     }
365                 }
366             if ( long_trace_flag)fclose ( long_trace);
367             
368             return UNDEFINED;
369             }
370         else
371             {
372             i=last_i=best_i;
373             j=last_j=best_j;
374             }
375         ala=alb=0;
376         
377         
378         while (i>0 && j>0)
379                         {
380                         if ( i==0 || j==0)k=UNDEFINED;
381                           /*                            k=-1;
382                         else if ( j==0)
383                                 k=1;
384                         else if ( j==0 && i==0)
385                         k=1;*/  
386                         else
387                                 {
388                                 if (long_trace_flag)
389                                    {
390                                    fseek ( long_trace, sizeof (TRACE_TYPE)*((lenal[1]+1)*(i)+j),SEEK_SET);
391                                    fread ( &k, sizeof (TRACE_TYPE), 1, long_trace);
392                                    }
393                                 else
394                                    {
395                                    
396                                    k=trace[i][j];
397                                    }
398                                 }
399                                 
400                         if ( k==UNDEFINED){i=j=0;}      
401                         if (k==0)
402                                 {
403                                 
404                                 al[0][ala++]=1;
405                                 al[1][alb++]=1;
406                                    
407                         
408  
409                                 i--;
410                                 j--;
411                                 last_i=i;
412                                 last_j=j;
413                                 
414                                 }
415                         else if (k==(TRACE_TYPE)UNDEFINED)
416                                 {
417                                 i=0;
418                                 j=0;
419                                 
420                                 }
421                         else if (k>0)
422                                 {
423                                 
424                                 for ( a=0; a< k; a++)
425                                         {
426                                         al[0][ala++]=1;
427                                         al[1][alb++]=0;
428                                         i--;
429                                         }
430                                 last_i=i;
431                                 last_j=j;
432                                 }
433                         else if (k<0)
434                                 {
435                                 
436                                 for ( a=0; a>k; a--)
437                                         {
438                                         al[0][ala++]=0;
439                                         al[1][alb++]=1;
440                                         j--;
441                                         }
442                                 last_i=i;
443                                 last_j=j;
444                                 }
445                         }
446       
447         LEN=ala;        
448         c=LEN-1;  
449         
450         
451
452         invert_list_char ( al[0], LEN);
453         invert_list_char ( al[1], LEN);
454         
455         if ( A->declared_len<=LEN)realloc_alignment  ( A, 2*LEN);       
456
457         
458         aln=A->seq_al;
459         
460         for ( c=0; c<2; c++)
461             for ( a=0; a<ns[c]; a++)
462                 {
463                   A->order[l_s[c][a]][2]=(c==0)?last_i:last_j;
464                   A->order[l_s[c][a]][3]=(c==1)?best_i:best_j;
465                   
466                   e=(c==0)?last_i:last_j;
467                   for ( d=0; d<e; d++)
468                     {
469                       A->order[l_s[c][a]][1]+=1-is_gap(aln[l_s[c][a]][d]);
470                     }
471                 }
472         
473                     
474         for ( c=0; c< 2; c++)
475             {
476             for ( a=0; a< ns[c]; a++) 
477                 {
478                 aln[l_s[c][a]]+=(c==0)?last_i:last_j;
479                 ch=0;
480                 for ( b=0; b< LEN; b++)
481                     {
482                    
483                     if (al[c][b]==1)
484                         char_buf[b]=aln[l_s[c][a]][ch++];
485                     else
486                         char_buf[b]='-';
487                    }
488                 char_buf[b]='\0';
489                 aln[l_s[c][a]]-=(c==0)?last_i:last_j;
490                 sprintf (aln[l_s[c][a]],"%s", char_buf);
491                 }
492              }
493         
494         
495         A->len_aln=LEN;
496         
497         free_int (pos0, -1);
498         vfree ( cc);
499         vfree (dd);             
500         vfree (ddg);
501         vfree (buffer);
502         vfree (char_buf); 
503         vfree (buf_trace);
504         if ( long_trace_flag)fclose (long_trace);       
505         
506                 
507         return best_score;
508         }
509
510
511 /*******************************************************************************/
512 /*                AUTOMATIC GEP+SCALE PENALTY FOR SW                           */
513 /*                                                                             */
514 /*                                                                             */
515 /*                                                                             */
516 /*                                                                             */
517 /*******************************************************************************/
518
519 Alignment * add_seq2aln   (Constraint_list *CL, Alignment *IN,Sequence  *S)
520            {    
521            int *n_groups;
522            int **group_list;
523            int a;
524            static int series=0;
525           
526           
527           
528           
529            int ste; /*sequence to extract, last one if they are packed*/
530
531
532
533
534
535            if (CL->packed_seq_lu){ste=S->nseq-1;}
536            else{ste=0;}
537
538            if ( IN==NULL)
539               {
540               IN=realloc_aln2(IN, 1, strlen (S->seq[ste])+1);
541               IN->S=S;
542               IN->nseq=1;
543               
544               
545               
546               sprintf ( IN->seq_al[0], "%s", S->seq[ste]);
547               sprintf (IN->name[0], "%s_%d_1", S->name[ste],series);
548               IN->order[0][0]=ste;
549               IN->order[0][1]=0;
550               
551               IN->len_aln=strlen ( IN->seq_al[0]);
552               series++;
553             
554               }    
555            else
556               {
557               
558               IN=realloc_aln2 ( IN, IN->nseq+1,MAX(strlen ( S->seq[ste])+1, IN->len_aln+1));
559               n_groups=(int*)vcalloc ( 2, sizeof (int));
560               group_list=declare_int (2,IN->nseq+1);
561               
562               n_groups[0]=IN->nseq;
563               for ( a=0; a<IN->nseq; a++)group_list[0][a]=a;
564               
565               n_groups[1]=1;
566               group_list[1][0]=IN->nseq;
567               sprintf (IN->name[IN->nseq], "%s_%d_%d",S->name[ste],series,IN->nseq+1);
568               sprintf (IN->seq_al[IN->nseq], "%s",S->seq[ste]);
569               IN->order[IN->nseq][0]=ste;
570               IN->order[IN->nseq][1]=0;
571               IN->nseq++;
572               
573              
574               pair_wise ( IN, n_groups, group_list,CL); 
575               
576               }
577           
578            return IN;
579               
580            }
581            
582