#include #include #include #include #include #include "io_lib_header.h" #include "util_lib_header.h" #include "define_header.h" #include "dp_lib_header.h" int domain_pair_wise (Alignment *A,int*in_ns, int **in_l_s,Constraint_list *CL ) { /*******************************************************************************/ /* SEQ_DOMAIN DP */ /* */ /* makes DP between the the ns[0] sequences and the ns[1] */ /* */ /* for MODE, see the function get_dp_cost */ /*******************************************************************************/ int scale, gop, gep, maximise; int a, b, i, j,l,x; int best_j; int lenal[2], len; int sub; int match; int *ns, **l_s; int *f; int *pf; int *dd; int *dd_len; int * e; int *pe; int * e_len; int *pe_len; int fop; int **pos0; int **al=NULL; int **pos_al=NULL; int ala,LEN; char *buffer; char *char_buf; /*trace back variables */ TRACE_TYPE *buf_trace=NULL; TRACE_TYPE **trace; TRACE_TYPE k; TRACE_TYPE *tr; int **result_aln; int nseq; /*Test Varaibles*/ int score; /*Prepare l_s and ns*/ ns=vcalloc( 2, sizeof (int)); l_s=vcalloc ( 2, sizeof (int*)); ns[0]=in_ns[1]; ns[1]=in_ns[0]; l_s[0]=in_l_s[1]; l_s[1]=in_l_s[0]; lenal[0]=strlen (A->seq_al[l_s[0][0]]); lenal[1]=strlen (A->seq_al[l_s[1][0]]); len=lenal[0]+lenal[1]+2; /********************************/ gop=(CL->gop)*SCORE_K; gep=(CL->gep)*SCORE_K; maximise=CL->maximise; scale=(lenal[1]*SCORE_K*(CL->moca)->moca_scale); /*******************************/ /*DO MEMORY ALLOCATION FOR DP*/ buf_trace=vcalloc ( len, sizeof (TRACE_TYPE)); buffer=vcalloc ( 2*len, sizeof (char)); al =declare_int (2, 2*len); pos_al=declare_int (2, 2*len); result_aln=declare_int (1,len); char_buf= vcalloc (2*len, sizeof (char)); f =vcalloc (len, sizeof (int)); pf =vcalloc (len, sizeof (int)); e =vcalloc (len, sizeof (int)); pe =vcalloc (len, sizeof (int)); e_len =vcalloc (len, sizeof (int)); pe_len =vcalloc (len, sizeof (int)); dd =vcalloc (len, sizeof (int)); dd_len=vcalloc (len, sizeof (int)); trace=declare_int (lenal[0]+2, lenal[1]+2); /*END OF MEMORY ALLOCATION*/ /* 0(s) +(dd) \ | \ | \ | \ | \ | \ | \| -(e)----O */ pos0=aln2pos_simple ( A,-1, ns, l_s); for ( i=0; i<=lenal[0]+1; i++) { tr=trace[i]; for ( sub=0,j=0; j<=lenal[1]; j++) { if (i==0 && j==0){tr[j]=1;pe[j]=dd[j]=gop;} else if (i==0) {e[j]=pe[j]=dd[j]=gop;dd_len[j]=e_len[j]=pe_len[j]=f[j]=pf[j]=0;tr[j]=-1;} else if (j==0) { for (f[j]=pf[0],best_j=0,a=1; a<=lenal[1]; a++) { if (f[j]!=MAX(pf[a]+scale,f[j])) { f[j]=pf[a]+scale; best_j=a; } } dd [j]=e[j]=pe[j]=gop; dd_len[j]=e_len[j]=0; tr [j]=best_j; } else if (i>lenal[0]); else { sub=(CL->get_dp_cost) (A, pos0, ns[0], l_s[0], i-1, pos0, ns[1], l_s[1],j-1,CL); match=pf[j-1]+sub; if (a_better_than_b(pf[j]+gop, pe[j]+gep,maximise)) { e [j]=pf[j]+gop; e_len[j]=1; } else { e [j]=pe [j]+gep; e_len[j]=pe_len[j]+1; } if (a_better_than_b(f[j-1]+gop, dd[j-1]+gep,maximise)) { dd [j]=f[j-1]+gop; dd_len[j]=1; } else { dd [j]=dd [j-1]+gep; dd_len[j]=dd_len[j-1]+1 ; } if ( sub!=UNDEFINED) { f[j] =best_int(4,maximise,&fop,e[j],match,dd[j],f[0]); fop-=1; if (fop==-1)fop= e_len[j]*fop; else if (fop==1 )fop=dd_len[j]*fop; else if (fop==2 )fop=UNDEFINED; } else { dd[j]=e[j]=match=-10000; f[j]=f[0]; fop=UNDEFINED; } pe [j] =e [j]; pf [j-1]=f [j-1]; pe_len[j] =e_len[j]; tr[j] =fop; } } pf [j-1]=f [j-1]; pe [j] =e [j]; pe_len[j] =e_len[j]; } score=f[0]; i=lenal[0]+1; j=0; ala=0; while (i!=0) { k=trace[i][j]; if (j==0 && i<=lenal[0]) { pos_al[0][ala]=i; pos_al[1][ala]=j; al[0][ala]=MATCH; al[1][ala]=UNALIGNED; i--; j=k; ala++; } else if ( j==0 && i>lenal[0]) { j=k; i--; } else if (k==0) { pos_al[0][ala]=i; pos_al[1][ala]=j; al[0][ala]=MATCH; al[1][ala]=MATCH; i--; j--; ala++; } else if (k!=UNDEFINED && k<0 && i>0) { for (x=0; x< -k && i>0; x++) { pos_al[0][ala]=i; pos_al[1][ala]=j; al[0][ala]=MATCH; al[1][ala]=GAP; i--; ala++; } } else if (k!=UNDEFINED && k>0 && j>0) { for ( x=0; x< k && j>0; x++) { pos_al[0][ala]=i; pos_al[1][ala]=j; al[0][ala]=GAP; al[1][ala]=MATCH; j--; ala++; } } else if ( k==UNDEFINED){j=0;} } LEN=ala; invert_list_int ( pos_al[0], LEN); invert_list_int ( pos_al[1], LEN); invert_list_int ( al[0], LEN); invert_list_int ( al[1], LEN); /*O: TARGET SEQUENCE (long)*/ /*1: PATTERN SEQUENCE (short)*/ for ( b=0; b0) { result_aln=realloc_int ( result_aln, read_size_int ( result_aln,sizeof (int*)),len, 1, 0); nseq++; l=0; for ( b=0; b0)nseq++; A=domain_match_list2aln ( A,ns,l_s,result_aln,nseq,lenal[1]); vfree (f); vfree (pf); vfree (e); vfree (pe); vfree (e_len); vfree (pe_len); vfree (dd_len); vfree (dd); free_int (pos0, -1); vfree (buffer); vfree (char_buf); vfree (buf_trace); free_int ( pos_al, -1); pos_al=NULL; free_int ( al, -1); free_int (trace,-1); free_int ( result_aln, -1); return score; } Alignment *domain_match_list2aln ( Alignment *A,int *ns,int **l_s,int **ml, int nseq, int len) { /* function documentation: start This function edits the alignment given the results obtained by DP ns: ns[0]->number of sequences serarched (TARGET) ns[1]->number of sequences in the pattern (PATTERN SEQ) l_s: l_s[0]->list of sequences in the TARGET... nseq: number of occurences of PATTERN in TARGET len: length of the PATTERN ml: detail of the nseq matches ml[x][y]=k-> residue k of TARGET matches residue y of pattern -> k=-1 means a gap; NOTE: This implementation can only match ONE target sequence with the PATTERN The Pattern can either be one sequence or a profile. function documentation: end */ int a, b, c, d, e; Alignment *B=NULL; int **new_ml; int *max_ml; int *start_ml; int tot_nseq; int max_len,seq; char *buf; if ( len==0 || nseq==0) { A->nseq=0; A->len_aln=0; } else { B=copy_aln(A, B); /*1 Extract the sequence used as a pattern, put it on the top*/ A=shrink_aln (A, ns[1], l_s[1]); A=realloc_aln2(A, ns[1]+ns[0]*nseq,len+A->len_aln+1); new_ml =declare_int ( nseq, 3*len); max_ml =vcalloc ( nseq, sizeof (int)); for ( a=0; a=0) { new_ml[c][b]=ml[c][a]; if ( max_ml[c]<0) start_ml[c]=max_ml[c]=ml[c][a]; for ( d=a+1; d=0){ max_ml[c]= ml[c][d];break;}} if (max_ml[c]!=new_ml[c][b]) { new_ml[c][b+1]=max_ml[c]-new_ml[c][b]-1; } max_len=MAX( max_len, new_ml[c][b+1]); } else { new_ml[c][b]=ml[c][a]; new_ml[c][b+1]=0; } } for ( c=0; c< nseq; c++){new_ml[c][b+2]=max_len;} } tot_nseq=ns[1]+ns[0]*nseq; for ( a=0, b=0; a< len ;a++) { /*1: Place the Match Column*/ for ( c=0; c< ns[1]; c++) { A->seq_al[c][b]=B->seq_al[l_s[1][c]][a]; A->seq_al[c][b+1]='\0'; } for ( e=0,c=ns[1]; cseq_al[c+d][b]=B->seq_al[l_s[0][d]][new_ml[e][3*a]]; else A->seq_al[c+d][b]='-'; A->seq_al[c+d][b+1]='\0'; } b++; /*2: Add the Gaps before the next_column*/ if ( new_ml[0][3*a+2]>0) { for ( c=0; c< ns[1]; c++) { buf=generate_null(new_ml[0][3*a+2]); strcat ( A->seq_al[c],buf); vfree (buf); } for (e=0,c=ns[1];c< tot_nseq; c+=ns[0], e++) { buf=extract_char (B->seq_al[l_s[0][0]], new_ml[e][3*a]+1, new_ml[e][3*a+1]); strcat ( A->seq_al[c],buf); vfree (buf); buf=generate_null(new_ml[e][3*a+2]-new_ml[e][3*a+1]); strcat ( A->seq_al[c],buf); vfree (buf); } } b+=new_ml[0][3*a+2]; } for (e=0,a=ns[1]; a< tot_nseq; a+=ns[0],e++) { for ( b=0; border[a+b][0]=B->order[seq][0]; A->order[a+b][1]=B->order[seq][1]; for ( c=0; corder[a+b][1]+=!is_gap(B->seq_al[seq][c]); sprintf ( A->name[a+b], "Repeat_%d", a+b); } } free_aln(B); A->nseq=tot_nseq; A->len_aln=strlen ( A->seq_al[0]); } return A; } Alignment * domain_seq2domain (Constraint_list *CL,int scale,int gop,int gep,Alignment *SEQ_DOMAIN, Alignment *TARGET) { static Alignment *A; int *n_groups; int **group_list; int a,b,c; A=copy_aln (TARGET, A); A=stack_aln( A, SEQ_DOMAIN); n_groups=vcalloc ( 2, sizeof (int)); group_list=declare_int (2, A->nseq); n_groups[0]=TARGET->nseq; n_groups[1]=SEQ_DOMAIN->nseq; for (c=0, a=0; a< 2; a++) { for (b=0; b< n_groups[a]; b++, c++) { group_list[a][b]=c; } } A->score_aln=domain_pair_wise (A, n_groups, group_list,CL); SEQ_DOMAIN=copy_aln (A, SEQ_DOMAIN); vfree (n_groups); free_int (group_list,-1); return SEQ_DOMAIN; } /******************************COPYRIGHT NOTICE*******************************/ /*© Centro de Regulacio Genomica */ /*and */ /*Cedric Notredame */ /*Fri Feb 18 08:27:45 CET 2011 - Revision 596. */ /*All rights reserved.*/ /*This file is part of T-COFFEE.*/ /**/ /* T-COFFEE is free software; you can redistribute it and/or modify*/ /* it under the terms of the GNU General Public License as published by*/ /* the Free Software Foundation; either version 2 of the License, or*/ /* (at your option) any later version.*/ /**/ /* T-COFFEE is distributed in the hope that it will be useful,*/ /* but WITHOUT ANY WARRANTY; without even the implied warranty of*/ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/ /* GNU General Public License for more details.*/ /**/ /* You should have received a copy of the GNU General Public License*/ /* along with Foobar; if not, write to the Free Software*/ /* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/ /*............................................... |*/ /* If you need some more information*/ /* cedric.notredame@europe.com*/ /*............................................... |*/ /**/ /**/ /* */ /******************************COPYRIGHT NOTICE*******************************/