JWS-112 Bumping version of T-Coffee to version 11.00.8cbe486.
[jabaws.git] / binaries / src / tcoffee / t_coffee_source / dp_lib / util_dp_gotoh_sw.c
diff --git a/binaries/src/tcoffee/t_coffee_source/dp_lib/util_dp_gotoh_sw.c b/binaries/src/tcoffee/t_coffee_source/dp_lib/util_dp_gotoh_sw.c
new file mode 100644 (file)
index 0000000..14c0d55
--- /dev/null
@@ -0,0 +1,582 @@
+/******************************COPYRIGHT NOTICE*******************************/
+/*  (c) Centro de Regulacio Genomica                                                        */
+/*  and                                                                                     */
+/*  Cedric Notredame                                                                        */
+/*  12 Aug 2014 - 22:07.                                                                    */
+/*All rights reserved.                                                                      */
+/*This file is part of T-COFFEE.                                                            */
+/*                                                                                          */
+/*    T-COFFEE is free software; you can redistribute it and/or modify                      */
+/*    it under the terms of the GNU General Public License as published by                  */
+/*    the Free Software Foundation; either version 2 of the License, or                     */
+/*    (at your option) any later version.                                                   */
+/*                                                                                          */
+/*    T-COFFEE is distributed in the hope that it will be useful,                           */
+/*    but WITHOUT ANY WARRANTY; without even the implied warranty of                        */
+/*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the                         */
+/*    GNU General Public License for more details.                                          */
+/*                                                                                          */
+/*    You should have received a copy of the GNU General Public License                     */
+/*    along with Foobar; if not, write to the Free Software                                 */
+/*    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA             */
+/*...............................................                                           */
+/*  If you need some more information                                                       */
+/*  cedric.notredame@europe.com                                                             */
+/*...............................................                                           */
+/******************************COPYRIGHT NOTICE*******************************/
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <stdarg.h>
+#include <string.h>
+
+#include "io_lib_header.h"
+#include "util_lib_header.h"
+#include "define_header.h"
+
+#include "dp_lib_header.h"
+
+int gotoh_pair_wise_lalign ( Alignment *A, int*ns, int **l_s,Constraint_list *CL)
+    {
+    Alignment *BUF=NULL;
+    Alignment *EA=NULL;
+    
+    int a;
+    BUF=copy_aln (A, BUF);
+    
+    
+    for ( a=0; a<CL->lalign_n_top; a++)
+      {
+       free_aln (A);
+
+       A=copy_aln (BUF, A);
+       
+       A->score_aln=gotoh_pair_wise_sw (A, ns, l_s, CL);
+       EA=fast_coffee_evaluate_output (A, CL);
+
+       output_format_aln (CL->out_aln_format[0],A,EA,"stdout");
+       CL=undefine_sw_aln ( A, CL);
+      }
+    exit (1);
+    return 0;
+    }
+Constraint_list * undefine_sw_aln ( Alignment *A, Constraint_list *CL)
+  {
+    int a, b, l;
+    int **pos;
+    int  r1, rs1;
+    int  r2, rs2;
+    
+
+
+    pos=aln2pos_simple ( A,A->nseq);
+    
+    for ( l=0; l< A->len_aln; l++)
+      for ( a=0; a< A->nseq-1; a++)
+       {
+         rs1=A->order[a][0];
+         r1 =pos[a][l];
+               
+         if ( r1<=0)continue;
+         for ( b=a+1; b< A->nseq;b++)
+                 {
+                   rs2=A->order[b][0];
+                   r2 =pos[b][l];
+                   if ( r2<=0)continue;
+                   
+                   CL=undefine_sw_pair ( CL, rs1, r1, rs2, r2);
+                 }
+       }
+    free_int (pos, -1);
+    return CL;
+  }
+Constraint_list * undefine_sw_pair ( Constraint_list *CL, int s1, int r1, int s2, int r2)
+  {
+    int a, b;
+    
+    if ( !CL->forbiden_pair_list)
+      {
+       
+       CL->forbiden_pair_list=(int ****)vcalloc ( (CL->S)->nseq, sizeof (int ***));
+       for ( a=0; a< ((CL->S)->nseq); a++)
+         {
+           CL->forbiden_pair_list[a]=(int ***)vcalloc ( (CL->S)->nseq, sizeof (int **));
+           for ( b=0; b< ((CL->S)->nseq); b++)
+             CL->forbiden_pair_list[a][b]=(int **)vcalloc ( (CL->S)->len[a]+1, sizeof (int *));
+           
+         }
+      }
+    if ( CL->forbiden_pair_list[s1][s2][r1]==NULL)CL->forbiden_pair_list[s1][s2][r1]=(int *)vcalloc ( (CL->S)->len[s2]+1, sizeof (int));
+    CL->forbiden_pair_list[s1][s2][r1][r2]=1;
+    
+    if ( CL->forbiden_pair_list[s2][s1][r2]==NULL)CL->forbiden_pair_list[s2][s1][r2]=(int *)vcalloc ( (CL->S)->len[s1]+1, sizeof (int));
+    CL->forbiden_pair_list[s2][s1][r2][r1]=1;
+    
+    return CL;
+  }
+       
+int sw_pair_is_defined ( Constraint_list *CL, int s1, int r1, int s2, int r2)
+        {
+         int d;
+         
+         d=(r1-r2);
+         d=(d<0)?-d:d;
+         
+
+         if ( s1==s2 && d<(CL->sw_min_dist)) return UNDEFINED;
+         else if ( ! CL->forbiden_pair_list) return 1;
+         else if ( CL->forbiden_pair_list[s1][s2][r1]==NULL)return 1;
+         else if ( CL->forbiden_pair_list[s1][s2][r1][r2]==1)return UNDEFINED;
+         else if ( CL->forbiden_pair_list[s1][s2][r1][r2]==0)return 1;
+         
+         else 
+           {
+             crash ("ERROR in function: sw_pair_is_defined\n");
+             return UNDEFINED;
+           }
+           
+       }       
+
+  
+int gotoh_pair_wise_sw (Alignment *A, int*ns, int **l_s,Constraint_list *CL)
+       {
+/*******************************************************************************/
+/*                SMITH AND WATERMAN                                           */
+/*                                                                             */
+/*     makes DP between the the ns[0] sequences and the ns[1]                 */
+/*                                                                             */
+/*     for MODE, see the function  get_dp_cost                                */
+/*******************************************************************************/
+       int a, b, d, i, j;
+       int last_i, last_j;
+       int t;
+       int *cc;
+       int *dd, *ddg;
+       int lenal[2], len;
+       
+       int c,s, e,eg, ch,g,h, maximise;
+       int sub;
+       
+       int fop;
+       static int **pos0;
+       
+       char **al=NULL;
+       char **aln=NULL;
+
+       
+       int ala, alb,LEN;
+       char *buffer;
+       char *char_buf;
+       
+/*trace back variables       */
+       int best_i;
+       int best_j;
+       int best_score;
+       
+       
+       FILE       *long_trace=NULL;
+       TRACE_TYPE *buf_trace=NULL;
+       static TRACE_TYPE **trace;
+       TRACE_TYPE k;
+       TRACE_TYPE *tr;
+       int long_trace_flag=0;
+       int dim;
+/********Prepare penalties*******/
+       if (CL->moca)
+         {
+           g=(CL->gop+(CL->moca)->moca_scale)*SCORE_K;
+           h=(CL->gep+(CL->moca)->moca_scale)*SCORE_K;
+         }
+       else
+         {
+           g=(CL->gop-CL->nomatch)*SCORE_K;
+           h=(CL->gep-CL->nomatch)*SCORE_K;
+         }
+       fprintf ( stderr, "\n%d %d", g, h);
+       maximise=CL->maximise;
+/********************************/     
+/*CLEAN UP AFTER USE*/
+       if ( A==NULL)
+          {
+          free_int (trace,-1);
+          trace=NULL;
+          if ( al)free_char (al,-1);
+          al=NULL;
+          return 0;
+          }       
+/*DO MEMORY ALLOCATION FOR SW DP*/
+       
+       lenal[0]=strlen (A->seq_al[l_s[0][0]]);
+       lenal[1]=strlen (A->seq_al[l_s[1][0]]);
+       len= (( lenal[0]>lenal[1])?lenal[0]:lenal[1])+1;
+       buf_trace=(int*)vcalloc ( len, sizeof (TRACE_TYPE));    
+       buffer=(char*)vcalloc ( 2*len, sizeof (char));  
+       al=declare_char (2, 2*len);  
+       
+       char_buf=(char*) vcalloc (2*len, sizeof (char));        
+       dd= (int*) vcalloc (len, sizeof (int));
+       cc=(int*) vcalloc (len, sizeof (int));
+       ddg=(int*)vcalloc (len, sizeof (int));
+       
+       
+       if ( len>=MAX_LEN_FOR_DP)
+           {       
+           long_trace_flag=1;
+           long_trace=vtmpfile();         
+           }
+       else
+           {
+           dim=(trace==NULL)?0:read_size_int ( trace,sizeof (int*));
+           trace    =realloc_int ( trace,dim,dim,len-dim, len-dim);
+           }
+       
+/*END OF MEMORY ALLOCATION*/
+       
+       
+               /*
+               0(s)   +(dd)
+                 \      |
+                  \     |
+                   \    |
+                    \   |
+                     \  |
+                      \ |
+                       \|
+               -(e)----O
+               */ 
+                      
+       
+       pos0=aln2pos_simple ( A,-1, ns, l_s);   
+
+       
+
+       cc[0]=0;
+
+       best_score=0;
+       best_i=0;
+       best_j=0;
+       
+       tr=(long_trace_flag)?buf_trace:trace[0];
+       tr[0]=(TRACE_TYPE)UNDEFINED;
+
+       t=g;
+       for ( j=1; j<=lenal[1]; j++)
+               {
+               cc[j]=t=t+h;
+               dd[j]=t+g;
+               tr[j]=(TRACE_TYPE)UNDEFINED;
+               }
+       if (long_trace_flag)fwrite (buf_trace, sizeof ( TRACE_TYPE),lenal[1]+1, long_trace);
+       
+
+       t=g;
+       for (i=1; i<=lenal[0];i++)
+                       {                      
+                       tr=(long_trace_flag)?buf_trace:trace[i];
+                       s=cc[0];
+                       cc[0]=c=t=t+h;
+                       e=t+g;
+                       tr[0]=(TRACE_TYPE)UNDEFINED;
+
+                       for (eg=0,j=1; j<=lenal[1];j++)
+                               {
+                               
+                               sub=(CL->get_dp_cost) (A, pos0, ns[0], l_s[0], i-1, pos0, ns[1], l_s[1],j-1,CL);
+                               
+                               /*get the best Insertion*/
+                               if ( a_better_than_b ( e, c+g, maximise))
+                                       eg++;
+                               else 
+                                       eg=1;   
+                               e=best_of_a_b (e, c+g, maximise)+h;
+                               
+                               /*Get the best deletion*/
+                               if ( a_better_than_b ( dd[j], cc[j]+g, maximise))
+                                       ddg[j]++;
+                               else 
+                                       ddg[j]=1;
+                               dd[j]=best_of_a_b( dd[j], cc[j]+g, maximise)+h;
+                               
+                               /*Chose Substitution for tie breaking*/
+                               if ( sub!=UNDEFINED)c=best_int(4,maximise,&fop, e, (s+sub), dd[j],0);
+                               else 
+                                  {
+                                  c=0;
+                                  fop=3;
+                                  dd[j]=e=0;
+                                  eg=ddg[j]=0;
+                                  }
+
+                               if ( c>best_score)
+                                  {
+                                  best_i=i;
+                                  best_j=j;
+                                  best_score=c;
+                                  }
+                               fop-=1;
+                               s=cc[j];
+                               cc[j]=c;                          
+                               
+                       
+                               if ( fop==-1)
+                                       {tr[j]=(TRACE_TYPE)fop*eg;
+                                       }
+                               else if ( fop==1)
+                                       {tr[j]=(TRACE_TYPE)fop*ddg[j];
+                                       }
+                               else if (fop==0)
+                                       {tr[j]=(TRACE_TYPE)0;   
+                                       }       
+                               else if ( fop==2)
+                                       {
+                                       tr[j]=(TRACE_TYPE)UNDEFINED;
+                                       }
+                               
+                               fop= -2;
+                               }
+                       if (long_trace_flag)
+                           {
+                           fwrite ( buf_trace, sizeof (TRACE_TYPE), lenal[1]+1, long_trace);
+                           }
+                       }
+       
+       
+
+
+       if (best_i==0 ||best_j==0 )
+           {
+           vfree (buf_trace);
+           vfree (buffer);
+           free_char ( al,-1);
+           vfree ( char_buf);
+           vfree ( dd);
+           vfree ( cc);                
+           vfree ( ddg);
+           free_int (pos0, -1);
+           A->len_aln=0;
+           aln=A->seq_al;
+           
+           for ( c=0; c< 2; c++)
+               {
+               for ( a=0; a< ns[c]; a++) 
+                   {
+                   aln[l_s[c][a]][0]='\0';
+                   }
+               }
+           if ( long_trace_flag)fclose ( long_trace);
+           
+           return UNDEFINED;
+           }
+       else
+           {
+           i=last_i=best_i;
+           j=last_j=best_j;
+           }
+       ala=alb=0;
+       
+       
+       while (i>0 && j>0)
+                       {
+                       if ( i==0 || j==0)k=UNDEFINED;
+                         /*                            k=-1;
+                       else if ( j==0)
+                               k=1;
+                       else if ( j==0 && i==0)
+                       k=1;*/  
+                       else
+                               {
+                               if (long_trace_flag)
+                                  {
+                                  fseek ( long_trace, sizeof (TRACE_TYPE)*((lenal[1]+1)*(i)+j),SEEK_SET);
+                                  fread ( &k, sizeof (TRACE_TYPE), 1, long_trace);
+                                  }
+                               else
+                                  {
+                                  
+                                  k=trace[i][j];
+                                  }
+                               }
+                               
+                       if ( k==UNDEFINED){i=j=0;}      
+                       if (k==0)
+                               {
+                               
+                               al[0][ala++]=1;
+                               al[1][alb++]=1;
+                                  
+                       
+                               i--;
+                               j--;
+                               last_i=i;
+                               last_j=j;
+                               
+                               }
+                       else if (k==(TRACE_TYPE)UNDEFINED)
+                               {
+                               i=0;
+                               j=0;
+                               
+                               }
+                       else if (k>0)
+                               {
+                               
+                               for ( a=0; a< k; a++)
+                                       {
+                                       al[0][ala++]=1;
+                                       al[1][alb++]=0;
+                                       i--;
+                                       }
+                               last_i=i;
+                               last_j=j;
+                               }
+                       else if (k<0)
+                               {
+                               
+                               for ( a=0; a>k; a--)
+                                       {
+                                       al[0][ala++]=0;
+                                       al[1][alb++]=1;
+                                       j--;
+                                       }
+                               last_i=i;
+                               last_j=j;
+                               }
+                       }
+      
+       LEN=ala;        
+       c=LEN-1;  
+       
+       
+
+       invert_list_char ( al[0], LEN);
+       invert_list_char ( al[1], LEN);
+       
+       if ( A->declared_len<=LEN)realloc_alignment  ( A, 2*LEN);       
+
+       
+       aln=A->seq_al;
+       
+       for ( c=0; c<2; c++)
+           for ( a=0; a<ns[c]; a++)
+               {
+                 A->order[l_s[c][a]][2]=(c==0)?last_i:last_j;
+                 A->order[l_s[c][a]][3]=(c==1)?best_i:best_j;
+                 
+                 e=(c==0)?last_i:last_j;
+                 for ( d=0; d<e; d++)
+                   {
+                     A->order[l_s[c][a]][1]+=1-is_gap(aln[l_s[c][a]][d]);
+                   }
+               }
+       
+                   
+       for ( c=0; c< 2; c++)
+           {
+           for ( a=0; a< ns[c]; a++) 
+               {
+               aln[l_s[c][a]]+=(c==0)?last_i:last_j;
+               ch=0;
+               for ( b=0; b< LEN; b++)
+                   {
+                  
+                   if (al[c][b]==1)
+                       char_buf[b]=aln[l_s[c][a]][ch++];
+                   else
+                       char_buf[b]='-';
+                  }
+               char_buf[b]='\0';
+               aln[l_s[c][a]]-=(c==0)?last_i:last_j;
+               sprintf (aln[l_s[c][a]],"%s", char_buf);
+               }
+            }
+       
+       
+       A->len_aln=LEN;
+       
+       free_int (pos0, -1);
+       vfree ( cc);
+       vfree (dd);             
+       vfree (ddg);
+       vfree (buffer);
+       vfree (char_buf); 
+       vfree (buf_trace);
+       if ( long_trace_flag)fclose (long_trace);       
+       
+               
+       return best_score;
+       }
+
+
+/*******************************************************************************/
+/*                AUTOMATIC GEP+SCALE PENALTY FOR SW                           */
+/*                                                                             */
+/*                                                                            */
+/*                                                                             */
+/*                                                                            */
+/*******************************************************************************/
+
+Alignment * add_seq2aln   (Constraint_list *CL, Alignment *IN,Sequence  *S)
+           {   
+          int *n_groups;
+          int **group_list;
+          int a;
+          static int series=0;
+         
+         
+         
+         
+          int ste; /*sequence to extract, last one if they are packed*/
+
+
+
+
+
+          if (CL->packed_seq_lu){ste=S->nseq-1;}
+          else{ste=0;}
+
+          if ( IN==NULL)
+             {
+             IN=realloc_aln2(IN, 1, strlen (S->seq[ste])+1);
+             IN->S=S;
+             IN->nseq=1;
+             
+             
+             
+             sprintf ( IN->seq_al[0], "%s", S->seq[ste]);
+             sprintf (IN->name[0], "%s_%d_1", S->name[ste],series);
+             IN->order[0][0]=ste;
+             IN->order[0][1]=0;
+             
+             IN->len_aln=strlen ( IN->seq_al[0]);
+             series++;
+           
+             }    
+          else
+             {
+             
+             IN=realloc_aln2 ( IN, IN->nseq+1,MAX(strlen ( S->seq[ste])+1, IN->len_aln+1));
+             n_groups=(int*)vcalloc ( 2, sizeof (int));
+             group_list=declare_int (2,IN->nseq+1);
+             
+             n_groups[0]=IN->nseq;
+             for ( a=0; a<IN->nseq; a++)group_list[0][a]=a;
+             
+             n_groups[1]=1;
+             group_list[1][0]=IN->nseq;
+             sprintf (IN->name[IN->nseq], "%s_%d_%d",S->name[ste],series,IN->nseq+1);
+             sprintf (IN->seq_al[IN->nseq], "%s",S->seq[ste]);
+             IN->order[IN->nseq][0]=ste;
+             IN->order[IN->nseq][1]=0;
+             IN->nseq++;
+             
+            
+             pair_wise ( IN, n_groups, group_list,CL); 
+             
+             }
+         
+          return IN;
+             
+          }
+          
+