new version of tcoffee 8.99 not yet compiled for ia32 linux (currently compiled for...
[jabaws.git] / binaries / src / tcoffee / t_coffee_source / util_domain_dp.c
diff --git a/binaries/src/tcoffee/t_coffee_source/util_domain_dp.c b/binaries/src/tcoffee/t_coffee_source/util_domain_dp.c
new file mode 100644 (file)
index 0000000..598c934
--- /dev/null
@@ -0,0 +1,569 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <stdarg.h>
+#include <string.h>
+
+#include "io_lib_header.h"
+#include "util_lib_header.h"
+#include "define_header.h"
+
+#include "dp_lib_header.h"
+
+int domain_pair_wise (Alignment *A,int*in_ns, int **in_l_s,Constraint_list *CL )
+       {
+/*******************************************************************************/
+/*                SEQ_DOMAIN DP                                                    */
+/*                                                                             */
+/*     makes DP between the the ns[0] sequences and the ns[1]                 */
+/*                                                                             */
+/*     for MODE, see the function  get_dp_cost                                */
+/*******************************************************************************/
+       
+       int scale, gop, gep, maximise; 
+       int a, b, i, j,l,x;
+       int best_j;
+
+       int lenal[2], len;
+       int sub;
+       int match;
+       
+       int *ns, **l_s;
+
+
+       int *f;
+       int *pf;
+
+       int *dd;
+       int *dd_len;
+       
+       int * e;
+       int *pe;
+       int * e_len;
+       int *pe_len;
+           
+       int fop;
+        int **pos0;
+       
+       int  **al=NULL;
+       int  **pos_al=NULL;
+
+
+       
+       int ala,LEN;
+       char *buffer;
+       char *char_buf;
+
+
+/*trace back variables       */
+       TRACE_TYPE *buf_trace=NULL;
+       TRACE_TYPE **trace;
+       TRACE_TYPE k;
+       TRACE_TYPE *tr;
+       int **result_aln;
+       int nseq;
+/*Test Varaibles*/
+       int score;
+
+/*Prepare l_s and ns*/
+
+       
+       ns=vcalloc( 2, sizeof (int));
+       l_s=vcalloc ( 2, sizeof (int*));
+       
+       ns[0]=in_ns[1];
+       ns[1]=in_ns[0];
+         
+       l_s[0]=in_l_s[1];
+       l_s[1]=in_l_s[0];
+       lenal[0]=strlen (A->seq_al[l_s[0][0]]);
+       lenal[1]=strlen (A->seq_al[l_s[1][0]]);
+       len=lenal[0]+lenal[1]+2;
+/********************************/     
+gop=(CL->gop)*SCORE_K;
+gep=(CL->gep)*SCORE_K;
+maximise=CL->maximise;
+scale=(lenal[1]*SCORE_K*(CL->moca)->moca_scale);
+/*******************************/
+
+       
+
+/*DO MEMORY ALLOCATION FOR DP*/
+       
+
+       
+
+
+       buf_trace=vcalloc ( len, sizeof (TRACE_TYPE));  
+       buffer=vcalloc ( 2*len, sizeof (char)); 
+       
+        al    =declare_int  (2, 2*len);  
+       pos_al=declare_int  (2, 2*len);
+       result_aln=declare_int (1,len);
+       char_buf= vcalloc (2*len, sizeof (char));       
+
+       f     =vcalloc (len, sizeof (int));
+       pf     =vcalloc (len, sizeof (int));
+       e     =vcalloc (len, sizeof (int));
+       pe     =vcalloc (len, sizeof (int));
+       e_len =vcalloc (len, sizeof (int));
+       pe_len =vcalloc (len, sizeof (int));
+       dd    =vcalloc (len, sizeof (int));
+       dd_len=vcalloc (len, sizeof (int));
+
+       
+       trace=declare_int (lenal[0]+2, lenal[1]+2);
+
+/*END OF MEMORY ALLOCATION*/
+       
+       
+               /*
+               0(s)   +(dd)
+                 \      |
+                  \     |
+                   \    |
+                    \   |
+                     \  |
+                      \ |
+                       \|
+               -(e)----O
+               */ 
+       
+       pos0=aln2pos_simple ( A,-1, ns, l_s);   
+
+       for ( i=0; i<=lenal[0]+1; i++)
+           {       
+           tr=trace[i];
+
+           
+           for ( sub=0,j=0; j<=lenal[1]; j++)
+               {
+               if      (i==0 && j==0){tr[j]=1;pe[j]=dd[j]=gop;}
+               else if (i==0)        {e[j]=pe[j]=dd[j]=gop;dd_len[j]=e_len[j]=pe_len[j]=f[j]=pf[j]=0;tr[j]=-1;}
+               else if (j==0)
+                    {
+                    for (f[j]=pf[0],best_j=0,a=1; a<=lenal[1]; a++)
+                        {
+                        if (f[j]!=MAX(pf[a]+scale,f[j]))
+                            {
+                            f[j]=pf[a]+scale;
+                            best_j=a;
+                            }                  
+                        }
+                    
+
+                    dd    [j]=e[j]=pe[j]=gop;
+                    dd_len[j]=e_len[j]=0;
+                    tr    [j]=best_j;
+                    
+                    }
+               else if (i>lenal[0]);
+               else
+                    {
+                    sub=(CL->get_dp_cost) (A, pos0, ns[0], l_s[0], i-1, pos0, ns[1], l_s[1],j-1,CL);
+                    
+                 
+                    
+                    match=pf[j-1]+sub;
+                    if (a_better_than_b(pf[j]+gop, pe[j]+gep,maximise))
+                        {
+                        e    [j]=pf[j]+gop;
+                        e_len[j]=1;
+                        }
+                    else 
+                        {
+                        e    [j]=pe    [j]+gep;
+                        e_len[j]=pe_len[j]+1;
+                        }
+                    
+                    
+                    if (a_better_than_b(f[j-1]+gop, dd[j-1]+gep,maximise))
+                       {
+                       dd    [j]=f[j-1]+gop;
+                       dd_len[j]=1;
+                       }
+                    else 
+                       {
+                       dd    [j]=dd    [j-1]+gep;
+                       dd_len[j]=dd_len[j-1]+1  ;
+                       }
+                    
+
+
+                    if ( sub!=UNDEFINED)
+                        {                       
+                        
+                        f[j] =best_int(4,maximise,&fop,e[j],match,dd[j],f[0]);
+                        fop-=1;
+                        
+                        if      (fop==-1)fop= e_len[j]*fop;
+                        else if (fop==1 )fop=dd_len[j]*fop;
+                        else if (fop==2 )fop=UNDEFINED;
+                        
+                        
+                        
+                        }
+                    else
+                        {
+                        dd[j]=e[j]=match=-10000;                        
+                        f[j]=f[0];
+                        fop=UNDEFINED;
+                        }
+                    pe    [j]  =e    [j];
+                    pf    [j-1]=f    [j-1];
+                    pe_len[j]  =e_len[j];
+                    tr[j]      =fop;   
+                    }
+       
+               }
+           
+           pf    [j-1]=f    [j-1];
+           pe    [j]  =e    [j];
+           pe_len[j]  =e_len[j];
+           }
+       score=f[0];
+        i=lenal[0]+1;
+       j=0;
+       ala=0;
+       
+       
+       while (i!=0)
+             {
+            
+
+             k=trace[i][j];
+             if (j==0 && i<=lenal[0])
+                {
+                
+                pos_al[0][ala]=i;
+                pos_al[1][ala]=j;
+                al[0][ala]=MATCH;
+                al[1][ala]=UNALIGNED;
+                i--;
+                j=k;
+                ala++;
+                }
+             else if ( j==0 && i>lenal[0])
+                {
+                j=k;
+                i--; 
+                
+                }
+             else if (k==0)
+                {              
+                pos_al[0][ala]=i;
+                pos_al[1][ala]=j;
+                al[0][ala]=MATCH;
+                al[1][ala]=MATCH;
+                i--;
+                j--;
+               
+                ala++;
+                }              
+             else if (k!=UNDEFINED && k<0 && i>0)
+                {
+                for (x=0; x< -k && i>0; x++)
+                    {
+                    pos_al[0][ala]=i;
+                    pos_al[1][ala]=j;    
+                    al[0][ala]=MATCH;
+                    al[1][ala]=GAP;             
+                    i--;
+                    ala++;
+                    }
+                }
+             else if (k!=UNDEFINED && k>0 && j>0)
+                {
+                for ( x=0; x< k && j>0; x++)
+                    {
+                    pos_al[0][ala]=i;
+                    pos_al[1][ala]=j;    
+                    al[0][ala]=GAP;
+                    al[1][ala]=MATCH;
+                    j--;
+                    ala++;
+                    }
+                }
+             else if ( k==UNDEFINED){j=0;}
+             
+             }
+
+       
+       LEN=ala;        
+
+       invert_list_int ( pos_al[0], LEN);
+       invert_list_int ( pos_al[1], LEN);              
+       invert_list_int (     al[0], LEN);
+       invert_list_int (     al[1], LEN);      
+
+       
+       /*O: TARGET SEQUENCE  (long)*/
+       /*1: PATTERN SEQUENCE (short)*/
+
+       
+       
+       for ( b=0; b<lenal[1]; b++)result_aln[0][b]=-1;
+       for (l=0, nseq=0, a=0; a< LEN; a++)
+           {
+           i=pos_al[0][a];
+           j=pos_al[1][a];
+           
+           if (al[1][a]==UNALIGNED && l>0)
+                    {
+                    result_aln=realloc_int ( result_aln, read_size_int ( result_aln,sizeof (int*)),len, 1, 0);            
+                    nseq++;
+                    l=0;
+                    for ( b=0; b<lenal[1]; b++)result_aln[nseq][b]=-1;
+                    }
+           
+           else if (al[1][a]==MATCH && al[0][a]==MATCH ){l++;result_aln[nseq][j-1]=i-1;}
+           else if (al[1][a]==MATCH && al[0][a]==GAP   ){l++;result_aln[nseq][j-1]=-1;}
+           
+           }
+       if ( l>0)nseq++;
+
+
+
+
+       A=domain_match_list2aln ( A,ns,l_s,result_aln,nseq,lenal[1]); 
+
+       
+       vfree (f);
+       vfree (pf);
+       vfree (e);
+       vfree (pe);
+       vfree (e_len);
+       vfree (pe_len);
+       vfree (dd_len);
+       vfree (dd);
+       free_int (pos0, -1);
+       vfree (buffer);
+       vfree (char_buf); 
+       vfree (buf_trace);
+       free_int ( pos_al, -1);
+        pos_al=NULL;
+         
+       free_int (     al, -1);
+
+       free_int (trace,-1);
+       free_int ( result_aln, -1);
+       return score;
+       }
+
+
+Alignment *domain_match_list2aln ( Alignment *A,int *ns,int **l_s,int **ml, int nseq, int len)
+           {
+
+            /*
+              function documentation: start
+              
+              This function edits the alignment given the results obtained by DP
+              ns: ns[0]->number of sequences serarched (TARGET)
+                  ns[1]->number of sequences in the pattern (PATTERN SEQ)
+              l_s:   
+                  l_s[0]->list of sequences in the TARGET...
+              
+              nseq: number of occurences of PATTERN in TARGET
+              len:  length of the PATTERN
+              
+              ml: detail of the nseq matches
+                  ml[x][y]=k-> residue k of TARGET matches residue y of pattern
+                            -> k=-1 means a gap;
+                            
+              NOTE: This implementation can only match ONE target sequence with the PATTERN
+              The Pattern can either be one sequence or a profile.
+
+              function documentation: end
+            */
+
+            
+            
+
+          int a, b, c, d, e;
+          Alignment *B=NULL;
+          int **new_ml;
+          int  *max_ml;
+          int  *start_ml;
+          int tot_nseq;
+          int max_len,seq;
+          char *buf;
+          
+          if ( len==0 || nseq==0)
+             {
+             A->nseq=0;
+             A->len_aln=0;
+             }
+          else
+             {
+             B=copy_aln(A, B);
+             /*1 Extract the sequence used as a pattern, put it on the top*/
+             
+           
+             A=shrink_aln (A, ns[1], l_s[1]);
+             A=realloc_aln2(A, ns[1]+ns[0]*nseq,len+A->len_aln+1);
+             
+             
+             
+             new_ml  =declare_int ( nseq, 3*len);
+             max_ml  =vcalloc ( nseq, sizeof (int));
+             for ( a=0; a<nseq; a++)max_ml[a]=-1;
+
+             start_ml=vcalloc ( nseq, sizeof (int));
+
+             for ( b=0,a=0; a< len; a++, b+=3)
+                 {
+                 for ( max_len=0,c=0; c< nseq; c++)
+                     {
+                     if ( max_ml[c]<0 && ml[c][a]<0)
+                        {
+                        new_ml[c][b]=ml[c][a];
+                        new_ml[c][b+1]=0;
+                        }
+                     else if ( ml[c][a]>=0)
+                        {                     
+                        new_ml[c][b]=ml[c][a];
+                        if ( max_ml[c]<0) start_ml[c]=max_ml[c]=ml[c][a];
+                        for ( d=a+1; d<len; d++){if ( ml[c][d]>=0){ max_ml[c]= ml[c][d];break;}}
+                        if (max_ml[c]!=new_ml[c][b])
+                           {
+                           new_ml[c][b+1]=max_ml[c]-new_ml[c][b]-1;
+                           }
+                        max_len=MAX( max_len, new_ml[c][b+1]);
+                        }
+                     else
+                        {
+                        new_ml[c][b]=ml[c][a];
+                        new_ml[c][b+1]=0;
+                        }
+                     }
+              
+                 for ( c=0; c< nseq; c++){new_ml[c][b+2]=max_len;}
+                 }
+         
+             tot_nseq=ns[1]+ns[0]*nseq;
+          
+             for ( a=0, b=0; a< len ;a++)
+                 {
+                
+                 /*1: Place the Match Column*/
+                 for ( c=0; c< ns[1]; c++)
+                     {
+                     A->seq_al[c][b]=B->seq_al[l_s[1][c]][a];
+                     A->seq_al[c][b+1]='\0';              
+                     }
+                 for ( e=0,c=ns[1]; c<tot_nseq; c+=ns[0],e++)
+                     for ( d=0; d< ns[0]; d++)
+                         {
+                         if ( new_ml[e][3*a]!=-1)
+                           A->seq_al[c+d][b]=B->seq_al[l_s[0][d]][new_ml[e][3*a]];
+                         else 
+                             A->seq_al[c+d][b]='-';
+                         A->seq_al[c+d][b+1]='\0';
+                      
+                         }
+                 b++;
+
+                /*2: Add the Gaps before the next_column*/
+                 if ( new_ml[0][3*a+2]>0)
+                    {
+                    for ( c=0; c< ns[1]; c++)
+                        {
+                        buf=generate_null(new_ml[0][3*a+2]);
+                        strcat ( A->seq_al[c],buf);
+                        vfree (buf);
+                        }
+                    for (e=0,c=ns[1];c< tot_nseq; c+=ns[0], e++)
+                        {
+                          buf=extract_char (B->seq_al[l_s[0][0]], new_ml[e][3*a]+1, new_ml[e][3*a+1]);
+                          strcat ( A->seq_al[c],buf);
+                          vfree (buf);
+                          buf=generate_null(new_ml[e][3*a+2]-new_ml[e][3*a+1]);
+                          strcat ( A->seq_al[c],buf);
+                          
+                          vfree (buf);
+                          
+                        }
+                    }
+                 b+=new_ml[0][3*a+2];
+                 }
+             
+             for (e=0,a=ns[1]; a< tot_nseq; a+=ns[0],e++)
+                 {
+                 for ( b=0; b<ns[0]; b++)
+                     {
+                     seq=l_s[0][b];
+                     A->order[a+b][0]=B->order[seq][0];
+                     A->order[a+b][1]=B->order[seq][1];
+                     for ( c=0; c<start_ml[e];c++)A->order[a+b][1]+=!is_gap(B->seq_al[seq][c]);
+                     sprintf ( A->name[a+b], "Repeat_%d", a+b);
+                     }
+                 }
+       
+             free_aln(B);
+             A->nseq=tot_nseq;
+             A->len_aln=strlen ( A->seq_al[0]);
+             
+             }
+          return A;
+          }
+Alignment * domain_seq2domain (Constraint_list *CL,int scale,int gop,int gep,Alignment *SEQ_DOMAIN, Alignment *TARGET) 
+          {    
+          static Alignment *A;
+          int *n_groups;
+          int **group_list;
+          int a,b,c;
+          
+          A=copy_aln (TARGET, A);
+          A=stack_aln( A, SEQ_DOMAIN);
+
+          
+          n_groups=vcalloc ( 2, sizeof (int));
+          group_list=declare_int (2, A->nseq);
+          
+          n_groups[0]=TARGET->nseq;
+          n_groups[1]=SEQ_DOMAIN->nseq;
+          for (c=0, a=0; a< 2; a++)
+              {
+              for (b=0; b< n_groups[a]; b++, c++)
+                  {
+                  group_list[a][b]=c;
+                  }
+              }           
+          A->score_aln=domain_pair_wise (A, n_groups, group_list,CL);
+          
+          SEQ_DOMAIN=copy_aln (A, SEQ_DOMAIN);
+          vfree (n_groups);
+          free_int (group_list,-1);
+          return SEQ_DOMAIN;
+          }
+          
+
+/******************************COPYRIGHT NOTICE*******************************/
+/*© Centro de Regulacio Genomica */
+/*and */
+/*Cedric Notredame */
+/*Fri Feb 18 08:27:45 CET 2011 - Revision 596. */
+/*All rights reserved.*/
+/*This file is part of T-COFFEE.*/
+/**/
+/*    T-COFFEE is free software; you can redistribute it and/or modify*/
+/*    it under the terms of the GNU General Public License as published by*/
+/*    the Free Software Foundation; either version 2 of the License, or*/
+/*    (at your option) any later version.*/
+/**/
+/*    T-COFFEE is distributed in the hope that it will be useful,*/
+/*    but WITHOUT ANY WARRANTY; without even the implied warranty of*/
+/*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the*/
+/*    GNU General Public License for more details.*/
+/**/
+/*    You should have received a copy of the GNU General Public License*/
+/*    along with Foobar; if not, write to the Free Software*/
+/*    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA*/
+/*...............................................                                                                                      |*/
+/*  If you need some more information*/
+/*  cedric.notredame@europe.com*/
+/*...............................................                                                                                                                                     |*/
+/**/
+/**/
+/*     */
+/******************************COPYRIGHT NOTICE*******************************/