Mac binaries
[jabaws.git] / website / archive / binaries / mac / src / tcoffee / t_coffee_source / util_dp_gotoh_sw.c
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <math.h>
4 #include <stdarg.h>
5 #include <string.h>
6
7 #include "io_lib_header.h"
8 #include "util_lib_header.h"
9 #include "define_header.h"
10
11 #include "dp_lib_header.h"
12
13 int gotoh_pair_wise_lalign ( Alignment *A, int*ns, int **l_s,Constraint_list *CL)
14     {
15     Alignment *BUF=NULL;
16     Alignment *EA=NULL;
17     
18     int a;
19     BUF=copy_aln (A, BUF);
20     
21     
22     for ( a=0; a<CL->lalign_n_top; a++)
23       {
24         free_aln (A);
25
26         A=copy_aln (BUF, A);
27         
28         A->score_aln=gotoh_pair_wise_sw (A, ns, l_s, CL);
29         EA=fast_coffee_evaluate_output (A, CL);
30
31         output_format_aln (CL->out_aln_format[0],A,EA,"stdout");
32         CL=undefine_sw_aln ( A, CL);
33       }
34     exit (1);
35     return 0;
36     }
37 Constraint_list * undefine_sw_aln ( Alignment *A, Constraint_list *CL)
38   {
39     int a, b, l;
40     int **pos;
41     int  r1, rs1;
42     int  r2, rs2;
43     
44
45
46     pos=aln2pos_simple ( A,A->nseq);
47     
48     for ( l=0; l< A->len_aln; l++)
49       for ( a=0; a< A->nseq-1; a++)
50         {
51           rs1=A->order[a][0];
52           r1 =pos[a][l];
53                 
54           if ( r1<=0)continue;
55           for ( b=a+1; b< A->nseq;b++)
56                   {
57                     rs2=A->order[b][0];
58                     r2 =pos[b][l];
59                     if ( r2<=0)continue;
60                     
61                     CL=undefine_sw_pair ( CL, rs1, r1, rs2, r2);
62                   }
63         }
64     free_int (pos, -1);
65     return CL;
66   }
67 Constraint_list * undefine_sw_pair ( Constraint_list *CL, int s1, int r1, int s2, int r2)
68   {
69     int a, b;
70     
71     if ( !CL->forbiden_pair_list)
72       {
73         
74         CL->forbiden_pair_list=vcalloc ( (CL->S)->nseq, sizeof (int ***));
75         for ( a=0; a< ((CL->S)->nseq); a++)
76           {
77             CL->forbiden_pair_list[a]=vcalloc ( (CL->S)->nseq, sizeof (int **));
78             for ( b=0; b< ((CL->S)->nseq); b++)
79               CL->forbiden_pair_list[a][b]=vcalloc ( (CL->S)->len[a]+1, sizeof (int *));
80             
81           }
82       }
83     if ( CL->forbiden_pair_list[s1][s2][r1]==NULL)CL->forbiden_pair_list[s1][s2][r1]=vcalloc ( (CL->S)->len[s2]+1, sizeof (int));
84     CL->forbiden_pair_list[s1][s2][r1][r2]=1;
85     
86     if ( CL->forbiden_pair_list[s2][s1][r2]==NULL)CL->forbiden_pair_list[s2][s1][r2]=vcalloc ( (CL->S)->len[s1]+1, sizeof (int));
87     CL->forbiden_pair_list[s2][s1][r2][r1]=1;
88     
89     return CL;
90   }
91        
92 int sw_pair_is_defined ( Constraint_list *CL, int s1, int r1, int s2, int r2)
93         {
94           int d;
95           
96           d=(r1-r2);
97           d=(d<0)?-d:d;
98           
99
100           if ( s1==s2 && d<(CL->sw_min_dist)) return UNDEFINED;
101           else if ( ! CL->forbiden_pair_list) return 1;
102           else if ( CL->forbiden_pair_list[s1][s2][r1]==NULL)return 1;
103           else if ( CL->forbiden_pair_list[s1][s2][r1][r2]==1)return UNDEFINED;
104           else if ( CL->forbiden_pair_list[s1][s2][r1][r2]==0)return 1;
105           
106           else 
107             {
108               crash ("ERROR in function: sw_pair_is_defined\n");
109               return UNDEFINED;
110             }
111             
112         }       
113
114   
115 int gotoh_pair_wise_sw (Alignment *A, int*ns, int **l_s,Constraint_list *CL)
116         {
117 /*******************************************************************************/
118 /*                SMITH AND WATERMAN                                           */
119 /*                                                                             */
120 /*      makes DP between the the ns[0] sequences and the ns[1]                 */
121 /*                                                                             */
122 /*      for MODE, see the function  get_dp_cost                                */
123 /*******************************************************************************/
124         int a, b, d, i, j;
125         int last_i, last_j;
126         int t;
127         int *cc;
128         int *dd, *ddg;
129         int lenal[2], len;
130         
131         int c,s, e,eg, ch,g,h, maximise;
132         int sub;
133         
134         int fop;
135         static int **pos0;
136         
137         char **al=NULL;
138         char **aln=NULL;
139
140         
141         int ala, alb,LEN;
142         char *buffer;
143         char *char_buf;
144         
145 /*trace back variables       */
146         int best_i;
147         int best_j;
148         int best_score;
149         
150         
151         FILE       *long_trace=NULL;
152         TRACE_TYPE *buf_trace=NULL;
153         static TRACE_TYPE **trace;
154         TRACE_TYPE k;
155         TRACE_TYPE *tr;
156         int long_trace_flag=0;
157         int dim;
158 /********Prepare penalties*******/
159         if (CL->moca)
160           {
161             g=(CL->gop+(CL->moca)->moca_scale)*SCORE_K;
162             h=(CL->gep+(CL->moca)->moca_scale)*SCORE_K;
163           }
164         else
165           {
166             g=(CL->gop-CL->nomatch)*SCORE_K;
167             h=(CL->gep-CL->nomatch)*SCORE_K;
168           }
169         fprintf ( stderr, "\n%d %d", g, h);
170         maximise=CL->maximise;
171 /********************************/      
172 /*CLEAN UP AFTER USE*/
173         if ( A==NULL)
174            {
175            free_int (trace,-1);
176            trace=NULL;
177            if ( al)free_char (al,-1);
178            al=NULL;
179            return 0;
180            }       
181 /*DO MEMORY ALLOCATION FOR SW DP*/
182         
183         lenal[0]=strlen (A->seq_al[l_s[0][0]]);
184         lenal[1]=strlen (A->seq_al[l_s[1][0]]);
185         len= (( lenal[0]>lenal[1])?lenal[0]:lenal[1])+1;
186         buf_trace=vcalloc ( len, sizeof (TRACE_TYPE));  
187         buffer=vcalloc ( 2*len, sizeof (char)); 
188         al=declare_char (2, 2*len);  
189         
190         char_buf= vcalloc (2*len, sizeof (char));       
191         dd= vcalloc (len, sizeof (int));
192         cc= vcalloc (len, sizeof (int));
193         ddg=vcalloc (len, sizeof (int));
194         
195         
196         if ( len>=MAX_LEN_FOR_DP)
197             {       
198             long_trace_flag=1;
199             long_trace=vtmpfile();         
200             }
201         else
202             {
203             dim=(trace==NULL)?0:read_size_int ( trace,sizeof (int*));
204             trace    =realloc_int ( trace,dim,dim,len-dim, len-dim);
205             }
206         
207 /*END OF MEMORY ALLOCATION*/
208         
209         
210                 /*
211                 0(s)   +(dd)
212                   \      |
213                    \     |
214                     \    |
215                      \   |
216                       \  |
217                        \ |
218                         \|
219                 -(e)----O
220                 */ 
221                        
222         
223         pos0=aln2pos_simple ( A,-1, ns, l_s);   
224
225         
226
227         cc[0]=0;
228
229         best_score=0;
230         best_i=0;
231         best_j=0;
232         
233         tr=(long_trace_flag)?buf_trace:trace[0];
234         tr[0]=(TRACE_TYPE)UNDEFINED;
235
236         t=g;
237         for ( j=1; j<=lenal[1]; j++)
238                 {
239                 cc[j]=t=t+h;
240                 dd[j]=t+g;
241                 tr[j]=(TRACE_TYPE)UNDEFINED;
242                 }
243         if (long_trace_flag)fwrite (buf_trace, sizeof ( TRACE_TYPE),lenal[1]+1, long_trace);
244         
245
246         t=g;
247         for (i=1; i<=lenal[0];i++)
248                         {                      
249                         tr=(long_trace_flag)?buf_trace:trace[i];
250                         s=cc[0];
251                         cc[0]=c=t=t+h;
252                         e=t+g;
253                         tr[0]=(TRACE_TYPE)UNDEFINED;
254
255                         for (eg=0,j=1; j<=lenal[1];j++)
256                                 {
257                                 
258                                 sub=(CL->get_dp_cost) (A, pos0, ns[0], l_s[0], i-1, pos0, ns[1], l_s[1],j-1,CL);
259                                 
260                                 /*get the best Insertion*/
261                                 if ( a_better_than_b ( e, c+g, maximise))
262                                         eg++;
263                                 else 
264                                         eg=1;   
265                                 e=best_of_a_b (e, c+g, maximise)+h;
266                                 
267                                 /*Get the best deletion*/
268                                 if ( a_better_than_b ( dd[j], cc[j]+g, maximise))
269                                         ddg[j]++;
270                                 else 
271                                         ddg[j]=1;
272                                 dd[j]=best_of_a_b( dd[j], cc[j]+g, maximise)+h;
273                                 
274                                 /*Chose Substitution for tie breaking*/
275                                 if ( sub!=UNDEFINED)c=best_int(4,maximise,&fop, e, (s+sub), dd[j],0);
276                                 else 
277                                    {
278                                    c=0;
279                                    fop=3;
280                                    dd[j]=e=0;
281                                    eg=ddg[j]=0;
282                                    }
283
284                                 if ( c>best_score)
285                                    {
286                                    best_i=i;
287                                    best_j=j;
288                                    best_score=c;
289                                    }
290                                 fop-=1;
291                                 s=cc[j];
292                                 cc[j]=c;                          
293                                 
294                         
295                                 if ( fop==-1)
296                                         {tr[j]=(TRACE_TYPE)fop*eg;
297                                         }
298                                 else if ( fop==1)
299                                         {tr[j]=(TRACE_TYPE)fop*ddg[j];
300                                         }
301                                 else if (fop==0)
302                                         {tr[j]=(TRACE_TYPE)0;   
303                                         }       
304                                 else if ( fop==2)
305                                         {
306                                         tr[j]=(TRACE_TYPE)UNDEFINED;
307                                         }
308                                 
309                                 fop= -2;
310                                 }
311                         if (long_trace_flag)
312                             {
313                             fwrite ( buf_trace, sizeof (TRACE_TYPE), lenal[1]+1, long_trace);
314                             }
315                         }
316         
317         
318
319
320         if (best_i==0 ||best_j==0 )
321             {
322             vfree (buf_trace);
323             vfree (buffer);
324             free_char ( al,-1);
325             vfree ( char_buf);
326             vfree ( dd);
327             vfree ( cc);                
328             vfree ( ddg);
329             free_int (pos0, -1);
330             A->len_aln=0;
331             aln=A->seq_al;
332             
333             for ( c=0; c< 2; c++)
334                 {
335                 for ( a=0; a< ns[c]; a++) 
336                     {
337                     aln[l_s[c][a]][0]='\0';
338                     }
339                 }
340             if ( long_trace_flag)fclose ( long_trace);
341             
342             return UNDEFINED;
343             }
344         else
345             {
346             i=last_i=best_i;
347             j=last_j=best_j;
348             }
349         ala=alb=0;
350         
351         
352         while (i>0 && j>0)
353                         {
354                         if ( i==0 || j==0)k=UNDEFINED;
355                           /*                            k=-1;
356                         else if ( j==0)
357                                 k=1;
358                         else if ( j==0 && i==0)
359                         k=1;*/  
360                         else
361                                 {
362                                 if (long_trace_flag)
363                                    {
364                                    fseek ( long_trace, sizeof (TRACE_TYPE)*((lenal[1]+1)*(i)+j),SEEK_SET);
365                                    fread ( &k, sizeof (TRACE_TYPE), 1, long_trace);
366                                    }
367                                 else
368                                    {
369                                    
370                                    k=trace[i][j];
371                                    }
372                                 }
373                                 
374                         if ( k==UNDEFINED){i=j=0;}      
375                         if (k==0)
376                                 {
377                                 
378                                 al[0][ala++]=1;
379                                 al[1][alb++]=1;
380                                    
381                         
382  
383                                 i--;
384                                 j--;
385                                 last_i=i;
386                                 last_j=j;
387                                 
388                                 }
389                         else if (k==(TRACE_TYPE)UNDEFINED)
390                                 {
391                                 i=0;
392                                 j=0;
393                                 
394                                 }
395                         else if (k>0)
396                                 {
397                                 
398                                 for ( a=0; a< k; a++)
399                                         {
400                                         al[0][ala++]=1;
401                                         al[1][alb++]=0;
402                                         i--;
403                                         }
404                                 last_i=i;
405                                 last_j=j;
406                                 }
407                         else if (k<0)
408                                 {
409                                 
410                                 for ( a=0; a>k; a--)
411                                         {
412                                         al[0][ala++]=0;
413                                         al[1][alb++]=1;
414                                         j--;
415                                         }
416                                 last_i=i;
417                                 last_j=j;
418                                 }
419                         }
420       
421         LEN=ala;        
422         c=LEN-1;  
423         
424         
425
426         invert_list_char ( al[0], LEN);
427         invert_list_char ( al[1], LEN);
428         
429         if ( A->declared_len<=LEN)realloc_alignment  ( A, 2*LEN);       
430
431         
432         aln=A->seq_al;
433         
434         for ( c=0; c<2; c++)
435             for ( a=0; a<ns[c]; a++)
436                 {
437                   A->order[l_s[c][a]][2]=(c==0)?last_i:last_j;
438                   A->order[l_s[c][a]][3]=(c==1)?best_i:best_j;
439                   
440                   e=(c==0)?last_i:last_j;
441                   for ( d=0; d<e; d++)
442                     {
443                       A->order[l_s[c][a]][1]+=1-is_gap(aln[l_s[c][a]][d]);
444                     }
445                 }
446         
447                     
448         for ( c=0; c< 2; c++)
449             {
450             for ( a=0; a< ns[c]; a++) 
451                 {
452                 aln[l_s[c][a]]+=(c==0)?last_i:last_j;
453                 ch=0;
454                 for ( b=0; b< LEN; b++)
455                     {
456                    
457                     if (al[c][b]==1)
458                         char_buf[b]=aln[l_s[c][a]][ch++];
459                     else
460                         char_buf[b]='-';
461                    }
462                 char_buf[b]='\0';
463                 aln[l_s[c][a]]-=(c==0)?last_i:last_j;
464                 sprintf (aln[l_s[c][a]],"%s", char_buf);
465                 }
466              }
467         
468         
469         A->len_aln=LEN;
470         
471         free_int (pos0, -1);
472         vfree ( cc);
473         vfree (dd);             
474         vfree (ddg);
475         vfree (buffer);
476         vfree (char_buf); 
477         vfree (buf_trace);
478         if ( long_trace_flag)fclose (long_trace);       
479         
480                 
481         return best_score;
482         }
483
484
485 /*******************************************************************************/
486 /*                AUTOMATIC GEP+SCALE PENALTY FOR SW                           */
487 /*                                                                             */
488 /*                                                                             */
489 /*                                                                             */
490 /*                                                                             */
491 /*******************************************************************************/
492
493 Alignment * add_seq2aln   (Constraint_list *CL, Alignment *IN,Sequence  *S)
494            {    
495            int *n_groups;
496            int **group_list;
497            int a;
498            static int series=0;
499           
500           
501           
502           
503            int ste; /*sequence to extract, last one if they are packed*/
504
505
506
507
508
509            if (CL->packed_seq_lu){ste=S->nseq-1;}
510            else{ste=0;}
511
512            if ( IN==NULL)
513               {
514               IN=realloc_aln2(IN, 1, strlen (S->seq[ste])+1);
515               IN->S=S;
516               IN->nseq=1;
517               
518               
519               
520               sprintf ( IN->seq_al[0], "%s", S->seq[ste]);
521               sprintf (IN->name[0], "%s_%d_1", S->name[ste],series);
522               IN->order[0][0]=ste;
523               IN->order[0][1]=0;
524               
525               IN->len_aln=strlen ( IN->seq_al[0]);
526               series++;
527             
528               }    
529            else
530               {
531               
532               IN=realloc_aln2 ( IN, IN->nseq+1,MAX(strlen ( S->seq[ste])+1, IN->len_aln+1));
533               n_groups=vcalloc ( 2, sizeof (int));
534               group_list=declare_int (2,IN->nseq+1);
535               
536               n_groups[0]=IN->nseq;
537               for ( a=0; a<IN->nseq; a++)group_list[0][a]=a;
538               
539               n_groups[1]=1;
540               group_list[1][0]=IN->nseq;
541               sprintf (IN->name[IN->nseq], "%s_%d_%d",S->name[ste],series,IN->nseq+1);
542               sprintf (IN->seq_al[IN->nseq], "%s",S->seq[ste]);
543               IN->order[IN->nseq][0]=ste;
544               IN->order[IN->nseq][1]=0;
545               IN->nseq++;
546               
547              
548               pair_wise ( IN, n_groups, group_list,CL); 
549               
550               }
551           
552            return IN;
553               
554            }
555            
556
557 /******************************COPYRIGHT NOTICE*******************************/
558 /*© Centro de Regulacio Genomica */
559 /*and */
560 /*Cedric Notredame */
561 /*Fri Feb 18 08:27:45 CET 2011 - Revision 596. */
562 /*All rights reserved.*/
563 /*This file is part of T-COFFEE.*/
564 /**/
565 /*    T-COFFEE is free software; you can redistribute it and/or modify*/
566 /*    it under the terms of the GNU General Public License as published by*/
567 /*    the Free Software Foundation; either version 2 of the License, or*/
568 /*    (at your option) any later version.*/
569 /**/
570 /*    T-COFFEE is distributed in the hope that it will be useful,*/
571 /*    but WITHOUT ANY WARRANTY; without even the implied warranty of*/
572 /*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the*/
573 /*    GNU General Public License for more details.*/
574 /**/
575 /*    You should have received a copy of the GNU General Public License*/
576 /*    along with Foobar; if not, write to the Free Software*/
577 /*    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA*/
578 /*...............................................                                                                                      |*/
579 /*  If you need some more information*/
580 /*  cedric.notredame@europe.com*/
581 /*...............................................                                                                                                                                     |*/
582 /**/
583 /**/
584 /*      */
585 /******************************COPYRIGHT NOTICE*******************************/