7 #include "io_lib_header.h"
8 #include "util_lib_header.h"
9 #include "define_header.h"
11 #include "dp_lib_header.h"
13 int gotoh_pair_wise_lalign ( Alignment *A, int*ns, int **l_s,Constraint_list *CL)
19 BUF=copy_aln (A, BUF);
22 for ( a=0; a<CL->lalign_n_top; a++)
28 A->score_aln=gotoh_pair_wise_sw (A, ns, l_s, CL);
29 EA=fast_coffee_evaluate_output (A, CL);
31 output_format_aln (CL->out_aln_format[0],A,EA,"stdout");
32 CL=undefine_sw_aln ( A, CL);
37 Constraint_list * undefine_sw_aln ( Alignment *A, Constraint_list *CL)
46 pos=aln2pos_simple ( A,A->nseq);
48 for ( l=0; l< A->len_aln; l++)
49 for ( a=0; a< A->nseq-1; a++)
55 for ( b=a+1; b< A->nseq;b++)
61 CL=undefine_sw_pair ( CL, rs1, r1, rs2, r2);
67 Constraint_list * undefine_sw_pair ( Constraint_list *CL, int s1, int r1, int s2, int r2)
71 if ( !CL->forbiden_pair_list)
74 CL->forbiden_pair_list=vcalloc ( (CL->S)->nseq, sizeof (int ***));
75 for ( a=0; a< ((CL->S)->nseq); a++)
77 CL->forbiden_pair_list[a]=vcalloc ( (CL->S)->nseq, sizeof (int **));
78 for ( b=0; b< ((CL->S)->nseq); b++)
79 CL->forbiden_pair_list[a][b]=vcalloc ( (CL->S)->len[a]+1, sizeof (int *));
83 if ( CL->forbiden_pair_list[s1][s2][r1]==NULL)CL->forbiden_pair_list[s1][s2][r1]=vcalloc ( (CL->S)->len[s2]+1, sizeof (int));
84 CL->forbiden_pair_list[s1][s2][r1][r2]=1;
86 if ( CL->forbiden_pair_list[s2][s1][r2]==NULL)CL->forbiden_pair_list[s2][s1][r2]=vcalloc ( (CL->S)->len[s1]+1, sizeof (int));
87 CL->forbiden_pair_list[s2][s1][r2][r1]=1;
92 int sw_pair_is_defined ( Constraint_list *CL, int s1, int r1, int s2, int r2)
100 if ( s1==s2 && d<(CL->sw_min_dist)) return UNDEFINED;
101 else if ( ! CL->forbiden_pair_list) return 1;
102 else if ( CL->forbiden_pair_list[s1][s2][r1]==NULL)return 1;
103 else if ( CL->forbiden_pair_list[s1][s2][r1][r2]==1)return UNDEFINED;
104 else if ( CL->forbiden_pair_list[s1][s2][r1][r2]==0)return 1;
108 crash ("ERROR in function: sw_pair_is_defined\n");
115 int gotoh_pair_wise_sw (Alignment *A, int*ns, int **l_s,Constraint_list *CL)
117 /*******************************************************************************/
118 /* SMITH AND WATERMAN */
120 /* makes DP between the the ns[0] sequences and the ns[1] */
122 /* for MODE, see the function get_dp_cost */
123 /*******************************************************************************/
131 int c,s, e,eg, ch,g,h, maximise;
145 /*trace back variables */
151 FILE *long_trace=NULL;
152 TRACE_TYPE *buf_trace=NULL;
153 static TRACE_TYPE **trace;
156 int long_trace_flag=0;
158 /********Prepare penalties*******/
161 g=(CL->gop+(CL->moca)->moca_scale)*SCORE_K;
162 h=(CL->gep+(CL->moca)->moca_scale)*SCORE_K;
166 g=(CL->gop-CL->nomatch)*SCORE_K;
167 h=(CL->gep-CL->nomatch)*SCORE_K;
169 fprintf ( stderr, "\n%d %d", g, h);
170 maximise=CL->maximise;
171 /********************************/
172 /*CLEAN UP AFTER USE*/
177 if ( al)free_char (al,-1);
181 /*DO MEMORY ALLOCATION FOR SW DP*/
183 lenal[0]=strlen (A->seq_al[l_s[0][0]]);
184 lenal[1]=strlen (A->seq_al[l_s[1][0]]);
185 len= (( lenal[0]>lenal[1])?lenal[0]:lenal[1])+1;
186 buf_trace=vcalloc ( len, sizeof (TRACE_TYPE));
187 buffer=vcalloc ( 2*len, sizeof (char));
188 al=declare_char (2, 2*len);
190 char_buf= vcalloc (2*len, sizeof (char));
191 dd= vcalloc (len, sizeof (int));
192 cc= vcalloc (len, sizeof (int));
193 ddg=vcalloc (len, sizeof (int));
196 if ( len>=MAX_LEN_FOR_DP)
199 long_trace=vtmpfile();
203 dim=(trace==NULL)?0:read_size_int ( trace,sizeof (int*));
204 trace =realloc_int ( trace,dim,dim,len-dim, len-dim);
207 /*END OF MEMORY ALLOCATION*/
223 pos0=aln2pos_simple ( A,-1, ns, l_s);
233 tr=(long_trace_flag)?buf_trace:trace[0];
234 tr[0]=(TRACE_TYPE)UNDEFINED;
237 for ( j=1; j<=lenal[1]; j++)
241 tr[j]=(TRACE_TYPE)UNDEFINED;
243 if (long_trace_flag)fwrite (buf_trace, sizeof ( TRACE_TYPE),lenal[1]+1, long_trace);
247 for (i=1; i<=lenal[0];i++)
249 tr=(long_trace_flag)?buf_trace:trace[i];
253 tr[0]=(TRACE_TYPE)UNDEFINED;
255 for (eg=0,j=1; j<=lenal[1];j++)
258 sub=(CL->get_dp_cost) (A, pos0, ns[0], l_s[0], i-1, pos0, ns[1], l_s[1],j-1,CL);
260 /*get the best Insertion*/
261 if ( a_better_than_b ( e, c+g, maximise))
265 e=best_of_a_b (e, c+g, maximise)+h;
267 /*Get the best deletion*/
268 if ( a_better_than_b ( dd[j], cc[j]+g, maximise))
272 dd[j]=best_of_a_b( dd[j], cc[j]+g, maximise)+h;
274 /*Chose Substitution for tie breaking*/
275 if ( sub!=UNDEFINED)c=best_int(4,maximise,&fop, e, (s+sub), dd[j],0);
296 {tr[j]=(TRACE_TYPE)fop*eg;
299 {tr[j]=(TRACE_TYPE)fop*ddg[j];
302 {tr[j]=(TRACE_TYPE)0;
306 tr[j]=(TRACE_TYPE)UNDEFINED;
313 fwrite ( buf_trace, sizeof (TRACE_TYPE), lenal[1]+1, long_trace);
320 if (best_i==0 ||best_j==0 )
333 for ( c=0; c< 2; c++)
335 for ( a=0; a< ns[c]; a++)
337 aln[l_s[c][a]][0]='\0';
340 if ( long_trace_flag)fclose ( long_trace);
354 if ( i==0 || j==0)k=UNDEFINED;
358 else if ( j==0 && i==0)
364 fseek ( long_trace, sizeof (TRACE_TYPE)*((lenal[1]+1)*(i)+j),SEEK_SET);
365 fread ( &k, sizeof (TRACE_TYPE), 1, long_trace);
374 if ( k==UNDEFINED){i=j=0;}
389 else if (k==(TRACE_TYPE)UNDEFINED)
398 for ( a=0; a< k; a++)
426 invert_list_char ( al[0], LEN);
427 invert_list_char ( al[1], LEN);
429 if ( A->declared_len<=LEN)realloc_alignment ( A, 2*LEN);
435 for ( a=0; a<ns[c]; a++)
437 A->order[l_s[c][a]][2]=(c==0)?last_i:last_j;
438 A->order[l_s[c][a]][3]=(c==1)?best_i:best_j;
440 e=(c==0)?last_i:last_j;
443 A->order[l_s[c][a]][1]+=1-is_gap(aln[l_s[c][a]][d]);
448 for ( c=0; c< 2; c++)
450 for ( a=0; a< ns[c]; a++)
452 aln[l_s[c][a]]+=(c==0)?last_i:last_j;
454 for ( b=0; b< LEN; b++)
458 char_buf[b]=aln[l_s[c][a]][ch++];
463 aln[l_s[c][a]]-=(c==0)?last_i:last_j;
464 sprintf (aln[l_s[c][a]],"%s", char_buf);
478 if ( long_trace_flag)fclose (long_trace);
485 /*******************************************************************************/
486 /* AUTOMATIC GEP+SCALE PENALTY FOR SW */
491 /*******************************************************************************/
493 Alignment * add_seq2aln (Constraint_list *CL, Alignment *IN,Sequence *S)
503 int ste; /*sequence to extract, last one if they are packed*/
509 if (CL->packed_seq_lu){ste=S->nseq-1;}
514 IN=realloc_aln2(IN, 1, strlen (S->seq[ste])+1);
520 sprintf ( IN->seq_al[0], "%s", S->seq[ste]);
521 sprintf (IN->name[0], "%s_%d_1", S->name[ste],series);
525 IN->len_aln=strlen ( IN->seq_al[0]);
532 IN=realloc_aln2 ( IN, IN->nseq+1,MAX(strlen ( S->seq[ste])+1, IN->len_aln+1));
533 n_groups=vcalloc ( 2, sizeof (int));
534 group_list=declare_int (2,IN->nseq+1);
536 n_groups[0]=IN->nseq;
537 for ( a=0; a<IN->nseq; a++)group_list[0][a]=a;
540 group_list[1][0]=IN->nseq;
541 sprintf (IN->name[IN->nseq], "%s_%d_%d",S->name[ste],series,IN->nseq+1);
542 sprintf (IN->seq_al[IN->nseq], "%s",S->seq[ste]);
543 IN->order[IN->nseq][0]=ste;
544 IN->order[IN->nseq][1]=0;
548 pair_wise ( IN, n_groups, group_list,CL);
557 /*********************************COPYRIGHT NOTICE**********************************/
558 /*© Centro de Regulacio Genomica */
560 /*Cedric Notredame */
561 /*Tue Oct 27 10:12:26 WEST 2009. */
562 /*All rights reserved.*/
563 /*This file is part of T-COFFEE.*/
565 /* T-COFFEE is free software; you can redistribute it and/or modify*/
566 /* it under the terms of the GNU General Public License as published by*/
567 /* the Free Software Foundation; either version 2 of the License, or*/
568 /* (at your option) any later version.*/
570 /* T-COFFEE is distributed in the hope that it will be useful,*/
571 /* but WITHOUT ANY WARRANTY; without even the implied warranty of*/
572 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/
573 /* GNU General Public License for more details.*/
575 /* You should have received a copy of the GNU General Public License*/
576 /* along with Foobar; if not, write to the Free Software*/
577 /* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/
578 /*............................................... |*/
579 /* If you need some more information*/
580 /* cedric.notredame@europe.com*/
581 /*............................................... |*/
585 /*********************************COPYRIGHT NOTICE**********************************/