7 #include "io_lib_header.h"
8 #include "util_lib_header.h"
9 #include "define_header.h"
11 #include "dp_lib_header.h"
13 int domain_pair_wise (Alignment *A,int*in_ns, int **in_l_s,Constraint_list *CL )
15 /*******************************************************************************/
18 /* makes DP between the the ns[0] sequences and the ns[1] */
20 /* for MODE, see the function get_dp_cost */
21 /*******************************************************************************/
23 int scale, gop, gep, maximise;
58 /*trace back variables */
59 TRACE_TYPE *buf_trace=NULL;
68 /*Prepare l_s and ns*/
71 ns=vcalloc( 2, sizeof (int));
72 l_s=vcalloc ( 2, sizeof (int*));
79 lenal[0]=strlen (A->seq_al[l_s[0][0]]);
80 lenal[1]=strlen (A->seq_al[l_s[1][0]]);
81 len=lenal[0]+lenal[1]+2;
82 /********************************/
83 gop=(CL->gop)*SCORE_K;
84 gep=(CL->gep)*SCORE_K;
85 maximise=CL->maximise;
86 scale=(lenal[1]*SCORE_K*(CL->moca)->moca_scale);
87 /*******************************/
91 /*DO MEMORY ALLOCATION FOR DP*/
97 buf_trace=vcalloc ( len, sizeof (TRACE_TYPE));
98 buffer=vcalloc ( 2*len, sizeof (char));
100 al =declare_int (2, 2*len);
101 pos_al=declare_int (2, 2*len);
102 result_aln=declare_int (1,len);
103 char_buf= vcalloc (2*len, sizeof (char));
105 f =vcalloc (len, sizeof (int));
106 pf =vcalloc (len, sizeof (int));
107 e =vcalloc (len, sizeof (int));
108 pe =vcalloc (len, sizeof (int));
109 e_len =vcalloc (len, sizeof (int));
110 pe_len =vcalloc (len, sizeof (int));
111 dd =vcalloc (len, sizeof (int));
112 dd_len=vcalloc (len, sizeof (int));
115 trace=declare_int (lenal[0]+2, lenal[1]+2);
117 /*END OF MEMORY ALLOCATION*/
132 pos0=aln2pos_simple ( A,-1, ns, l_s);
134 for ( i=0; i<=lenal[0]+1; i++)
139 for ( sub=0,j=0; j<=lenal[1]; j++)
141 if (i==0 && j==0){tr[j]=1;pe[j]=dd[j]=gop;}
142 else if (i==0) {e[j]=pe[j]=dd[j]=gop;dd_len[j]=e_len[j]=pe_len[j]=f[j]=pf[j]=0;tr[j]=-1;}
145 for (f[j]=pf[0],best_j=0,a=1; a<=lenal[1]; a++)
147 if (f[j]!=MAX(pf[a]+scale,f[j]))
155 dd [j]=e[j]=pe[j]=gop;
156 dd_len[j]=e_len[j]=0;
160 else if (i>lenal[0]);
163 sub=(CL->get_dp_cost) (A, pos0, ns[0], l_s[0], i-1, pos0, ns[1], l_s[1],j-1,CL);
168 if (a_better_than_b(pf[j]+gop, pe[j]+gep,maximise))
176 e_len[j]=pe_len[j]+1;
180 if (a_better_than_b(f[j-1]+gop, dd[j-1]+gep,maximise))
188 dd_len[j]=dd_len[j-1]+1 ;
196 f[j] =best_int(4,maximise,&fop,e[j],match,dd[j],f[0]);
199 if (fop==-1)fop= e_len[j]*fop;
200 else if (fop==1 )fop=dd_len[j]*fop;
201 else if (fop==2 )fop=UNDEFINED;
208 dd[j]=e[j]=match=-10000;
235 if (j==0 && i<=lenal[0])
241 al[1][ala]=UNALIGNED;
246 else if ( j==0 && i>lenal[0])
263 else if (k!=UNDEFINED && k<0 && i>0)
265 for (x=0; x< -k && i>0; x++)
275 else if (k!=UNDEFINED && k>0 && j>0)
277 for ( x=0; x< k && j>0; x++)
287 else if ( k==UNDEFINED){j=0;}
294 invert_list_int ( pos_al[0], LEN);
295 invert_list_int ( pos_al[1], LEN);
296 invert_list_int ( al[0], LEN);
297 invert_list_int ( al[1], LEN);
300 /*O: TARGET SEQUENCE (long)*/
301 /*1: PATTERN SEQUENCE (short)*/
305 for ( b=0; b<lenal[1]; b++)result_aln[0][b]=-1;
306 for (l=0, nseq=0, a=0; a< LEN; a++)
311 if (al[1][a]==UNALIGNED && l>0)
313 result_aln=realloc_int ( result_aln, read_size_int ( result_aln,sizeof (int*)),len, 1, 0);
316 for ( b=0; b<lenal[1]; b++)result_aln[nseq][b]=-1;
319 else if (al[1][a]==MATCH && al[0][a]==MATCH ){l++;result_aln[nseq][j-1]=i-1;}
320 else if (al[1][a]==MATCH && al[0][a]==GAP ){l++;result_aln[nseq][j-1]=-1;}
328 A=domain_match_list2aln ( A,ns,l_s,result_aln,nseq,lenal[1]);
343 free_int ( pos_al, -1);
349 free_int ( result_aln, -1);
354 Alignment *domain_match_list2aln ( Alignment *A,int *ns,int **l_s,int **ml, int nseq, int len)
358 function documentation: start
360 This function edits the alignment given the results obtained by DP
361 ns: ns[0]->number of sequences serarched (TARGET)
362 ns[1]->number of sequences in the pattern (PATTERN SEQ)
364 l_s[0]->list of sequences in the TARGET...
366 nseq: number of occurences of PATTERN in TARGET
367 len: length of the PATTERN
369 ml: detail of the nseq matches
370 ml[x][y]=k-> residue k of TARGET matches residue y of pattern
373 NOTE: This implementation can only match ONE target sequence with the PATTERN
374 The Pattern can either be one sequence or a profile.
376 function documentation: end
391 if ( len==0 || nseq==0)
399 /*1 Extract the sequence used as a pattern, put it on the top*/
402 A=shrink_aln (A, ns[1], l_s[1]);
403 A=realloc_aln2(A, ns[1]+ns[0]*nseq,len+A->len_aln+1);
407 new_ml =declare_int ( nseq, 3*len);
408 max_ml =vcalloc ( nseq, sizeof (int));
409 for ( a=0; a<nseq; a++)max_ml[a]=-1;
411 start_ml=vcalloc ( nseq, sizeof (int));
413 for ( b=0,a=0; a< len; a++, b+=3)
415 for ( max_len=0,c=0; c< nseq; c++)
417 if ( max_ml[c]<0 && ml[c][a]<0)
419 new_ml[c][b]=ml[c][a];
422 else if ( ml[c][a]>=0)
424 new_ml[c][b]=ml[c][a];
425 if ( max_ml[c]<0) start_ml[c]=max_ml[c]=ml[c][a];
426 for ( d=a+1; d<len; d++){if ( ml[c][d]>=0){ max_ml[c]= ml[c][d];break;}}
427 if (max_ml[c]!=new_ml[c][b])
429 new_ml[c][b+1]=max_ml[c]-new_ml[c][b]-1;
431 max_len=MAX( max_len, new_ml[c][b+1]);
435 new_ml[c][b]=ml[c][a];
440 for ( c=0; c< nseq; c++){new_ml[c][b+2]=max_len;}
443 tot_nseq=ns[1]+ns[0]*nseq;
445 for ( a=0, b=0; a< len ;a++)
448 /*1: Place the Match Column*/
449 for ( c=0; c< ns[1]; c++)
451 A->seq_al[c][b]=B->seq_al[l_s[1][c]][a];
452 A->seq_al[c][b+1]='\0';
454 for ( e=0,c=ns[1]; c<tot_nseq; c+=ns[0],e++)
455 for ( d=0; d< ns[0]; d++)
457 if ( new_ml[e][3*a]!=-1)
458 A->seq_al[c+d][b]=B->seq_al[l_s[0][d]][new_ml[e][3*a]];
460 A->seq_al[c+d][b]='-';
461 A->seq_al[c+d][b+1]='\0';
466 /*2: Add the Gaps before the next_column*/
467 if ( new_ml[0][3*a+2]>0)
469 for ( c=0; c< ns[1]; c++)
471 buf=generate_null(new_ml[0][3*a+2]);
472 strcat ( A->seq_al[c],buf);
475 for (e=0,c=ns[1];c< tot_nseq; c+=ns[0], e++)
477 buf=extract_char (B->seq_al[l_s[0][0]], new_ml[e][3*a]+1, new_ml[e][3*a+1]);
478 strcat ( A->seq_al[c],buf);
480 buf=generate_null(new_ml[e][3*a+2]-new_ml[e][3*a+1]);
481 strcat ( A->seq_al[c],buf);
490 for (e=0,a=ns[1]; a< tot_nseq; a+=ns[0],e++)
492 for ( b=0; b<ns[0]; b++)
495 A->order[a+b][0]=B->order[seq][0];
496 A->order[a+b][1]=B->order[seq][1];
497 for ( c=0; c<start_ml[e];c++)A->order[a+b][1]+=!is_gap(B->seq_al[seq][c]);
498 sprintf ( A->name[a+b], "Repeat_%d", a+b);
504 A->len_aln=strlen ( A->seq_al[0]);
509 Alignment * domain_seq2domain (Constraint_list *CL,int scale,int gop,int gep,Alignment *SEQ_DOMAIN, Alignment *TARGET)
516 A=copy_aln (TARGET, A);
517 A=stack_aln( A, SEQ_DOMAIN);
520 n_groups=vcalloc ( 2, sizeof (int));
521 group_list=declare_int (2, A->nseq);
523 n_groups[0]=TARGET->nseq;
524 n_groups[1]=SEQ_DOMAIN->nseq;
525 for (c=0, a=0; a< 2; a++)
527 for (b=0; b< n_groups[a]; b++, c++)
532 A->score_aln=domain_pair_wise (A, n_groups, group_list,CL);
534 SEQ_DOMAIN=copy_aln (A, SEQ_DOMAIN);
536 free_int (group_list,-1);
541 /*********************************COPYRIGHT NOTICE**********************************/
542 /*© Centro de Regulacio Genomica */
544 /*Cedric Notredame */
545 /*Tue Oct 27 10:12:26 WEST 2009. */
546 /*All rights reserved.*/
547 /*This file is part of T-COFFEE.*/
549 /* T-COFFEE is free software; you can redistribute it and/or modify*/
550 /* it under the terms of the GNU General Public License as published by*/
551 /* the Free Software Foundation; either version 2 of the License, or*/
552 /* (at your option) any later version.*/
554 /* T-COFFEE is distributed in the hope that it will be useful,*/
555 /* but WITHOUT ANY WARRANTY; without even the implied warranty of*/
556 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/
557 /* GNU General Public License for more details.*/
559 /* You should have received a copy of the GNU General Public License*/
560 /* along with Foobar; if not, write to the Free Software*/
561 /* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/
562 /*............................................... |*/
563 /* If you need some more information*/
564 /* cedric.notredame@europe.com*/
565 /*............................................... |*/
569 /*********************************COPYRIGHT NOTICE**********************************/