1 /******************************COPYRIGHT NOTICE*******************************/
2 /* (c) Centro de Regulacio Genomica */
5 /* 12 Aug 2014 - 22:07. */
6 /*All rights reserved. */
7 /*This file is part of T-COFFEE. */
9 /* T-COFFEE is free software; you can redistribute it and/or modify */
10 /* it under the terms of the GNU General Public License as published by */
11 /* the Free Software Foundation; either version 2 of the License, or */
12 /* (at your option) any later version. */
14 /* T-COFFEE is distributed in the hope that it will be useful, */
15 /* but WITHOUT ANY WARRANTY; without even the implied warranty of */
16 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */
17 /* GNU General Public License for more details. */
19 /* You should have received a copy of the GNU General Public License */
20 /* along with Foobar; if not, write to the Free Software */
21 /* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
22 /*............................................... */
23 /* If you need some more information */
24 /* cedric.notredame@europe.com */
25 /*............................................... */
26 /******************************COPYRIGHT NOTICE*******************************/
33 #include "io_lib_header.h"
34 #include "util_lib_header.h"
35 #include "define_header.h"
37 #include "dp_lib_header.h"
39 int gotoh_pair_wise_lalign ( Alignment *A, int*ns, int **l_s,Constraint_list *CL)
45 BUF=copy_aln (A, BUF);
48 for ( a=0; a<CL->lalign_n_top; a++)
54 A->score_aln=gotoh_pair_wise_sw (A, ns, l_s, CL);
55 EA=fast_coffee_evaluate_output (A, CL);
57 output_format_aln (CL->out_aln_format[0],A,EA,"stdout");
58 CL=undefine_sw_aln ( A, CL);
63 Constraint_list * undefine_sw_aln ( Alignment *A, Constraint_list *CL)
72 pos=aln2pos_simple ( A,A->nseq);
74 for ( l=0; l< A->len_aln; l++)
75 for ( a=0; a< A->nseq-1; a++)
81 for ( b=a+1; b< A->nseq;b++)
87 CL=undefine_sw_pair ( CL, rs1, r1, rs2, r2);
93 Constraint_list * undefine_sw_pair ( Constraint_list *CL, int s1, int r1, int s2, int r2)
97 if ( !CL->forbiden_pair_list)
100 CL->forbiden_pair_list=(int ****)vcalloc ( (CL->S)->nseq, sizeof (int ***));
101 for ( a=0; a< ((CL->S)->nseq); a++)
103 CL->forbiden_pair_list[a]=(int ***)vcalloc ( (CL->S)->nseq, sizeof (int **));
104 for ( b=0; b< ((CL->S)->nseq); b++)
105 CL->forbiden_pair_list[a][b]=(int **)vcalloc ( (CL->S)->len[a]+1, sizeof (int *));
109 if ( CL->forbiden_pair_list[s1][s2][r1]==NULL)CL->forbiden_pair_list[s1][s2][r1]=(int *)vcalloc ( (CL->S)->len[s2]+1, sizeof (int));
110 CL->forbiden_pair_list[s1][s2][r1][r2]=1;
112 if ( CL->forbiden_pair_list[s2][s1][r2]==NULL)CL->forbiden_pair_list[s2][s1][r2]=(int *)vcalloc ( (CL->S)->len[s1]+1, sizeof (int));
113 CL->forbiden_pair_list[s2][s1][r2][r1]=1;
118 int sw_pair_is_defined ( Constraint_list *CL, int s1, int r1, int s2, int r2)
126 if ( s1==s2 && d<(CL->sw_min_dist)) return UNDEFINED;
127 else if ( ! CL->forbiden_pair_list) return 1;
128 else if ( CL->forbiden_pair_list[s1][s2][r1]==NULL)return 1;
129 else if ( CL->forbiden_pair_list[s1][s2][r1][r2]==1)return UNDEFINED;
130 else if ( CL->forbiden_pair_list[s1][s2][r1][r2]==0)return 1;
134 crash ("ERROR in function: sw_pair_is_defined\n");
141 int gotoh_pair_wise_sw (Alignment *A, int*ns, int **l_s,Constraint_list *CL)
143 /*******************************************************************************/
144 /* SMITH AND WATERMAN */
146 /* makes DP between the the ns[0] sequences and the ns[1] */
148 /* for MODE, see the function get_dp_cost */
149 /*******************************************************************************/
157 int c,s, e,eg, ch,g,h, maximise;
171 /*trace back variables */
177 FILE *long_trace=NULL;
178 TRACE_TYPE *buf_trace=NULL;
179 static TRACE_TYPE **trace;
182 int long_trace_flag=0;
184 /********Prepare penalties*******/
187 g=(CL->gop+(CL->moca)->moca_scale)*SCORE_K;
188 h=(CL->gep+(CL->moca)->moca_scale)*SCORE_K;
192 g=(CL->gop-CL->nomatch)*SCORE_K;
193 h=(CL->gep-CL->nomatch)*SCORE_K;
195 fprintf ( stderr, "\n%d %d", g, h);
196 maximise=CL->maximise;
197 /********************************/
198 /*CLEAN UP AFTER USE*/
203 if ( al)free_char (al,-1);
207 /*DO MEMORY ALLOCATION FOR SW DP*/
209 lenal[0]=strlen (A->seq_al[l_s[0][0]]);
210 lenal[1]=strlen (A->seq_al[l_s[1][0]]);
211 len= (( lenal[0]>lenal[1])?lenal[0]:lenal[1])+1;
212 buf_trace=(int*)vcalloc ( len, sizeof (TRACE_TYPE));
213 buffer=(char*)vcalloc ( 2*len, sizeof (char));
214 al=declare_char (2, 2*len);
216 char_buf=(char*) vcalloc (2*len, sizeof (char));
217 dd= (int*) vcalloc (len, sizeof (int));
218 cc=(int*) vcalloc (len, sizeof (int));
219 ddg=(int*)vcalloc (len, sizeof (int));
222 if ( len>=MAX_LEN_FOR_DP)
225 long_trace=vtmpfile();
229 dim=(trace==NULL)?0:read_size_int ( trace,sizeof (int*));
230 trace =realloc_int ( trace,dim,dim,len-dim, len-dim);
233 /*END OF MEMORY ALLOCATION*/
249 pos0=aln2pos_simple ( A,-1, ns, l_s);
259 tr=(long_trace_flag)?buf_trace:trace[0];
260 tr[0]=(TRACE_TYPE)UNDEFINED;
263 for ( j=1; j<=lenal[1]; j++)
267 tr[j]=(TRACE_TYPE)UNDEFINED;
269 if (long_trace_flag)fwrite (buf_trace, sizeof ( TRACE_TYPE),lenal[1]+1, long_trace);
273 for (i=1; i<=lenal[0];i++)
275 tr=(long_trace_flag)?buf_trace:trace[i];
279 tr[0]=(TRACE_TYPE)UNDEFINED;
281 for (eg=0,j=1; j<=lenal[1];j++)
284 sub=(CL->get_dp_cost) (A, pos0, ns[0], l_s[0], i-1, pos0, ns[1], l_s[1],j-1,CL);
286 /*get the best Insertion*/
287 if ( a_better_than_b ( e, c+g, maximise))
291 e=best_of_a_b (e, c+g, maximise)+h;
293 /*Get the best deletion*/
294 if ( a_better_than_b ( dd[j], cc[j]+g, maximise))
298 dd[j]=best_of_a_b( dd[j], cc[j]+g, maximise)+h;
300 /*Chose Substitution for tie breaking*/
301 if ( sub!=UNDEFINED)c=best_int(4,maximise,&fop, e, (s+sub), dd[j],0);
322 {tr[j]=(TRACE_TYPE)fop*eg;
325 {tr[j]=(TRACE_TYPE)fop*ddg[j];
328 {tr[j]=(TRACE_TYPE)0;
332 tr[j]=(TRACE_TYPE)UNDEFINED;
339 fwrite ( buf_trace, sizeof (TRACE_TYPE), lenal[1]+1, long_trace);
346 if (best_i==0 ||best_j==0 )
359 for ( c=0; c< 2; c++)
361 for ( a=0; a< ns[c]; a++)
363 aln[l_s[c][a]][0]='\0';
366 if ( long_trace_flag)fclose ( long_trace);
380 if ( i==0 || j==0)k=UNDEFINED;
384 else if ( j==0 && i==0)
390 fseek ( long_trace, sizeof (TRACE_TYPE)*((lenal[1]+1)*(i)+j),SEEK_SET);
391 fread ( &k, sizeof (TRACE_TYPE), 1, long_trace);
400 if ( k==UNDEFINED){i=j=0;}
415 else if (k==(TRACE_TYPE)UNDEFINED)
424 for ( a=0; a< k; a++)
452 invert_list_char ( al[0], LEN);
453 invert_list_char ( al[1], LEN);
455 if ( A->declared_len<=LEN)realloc_alignment ( A, 2*LEN);
461 for ( a=0; a<ns[c]; a++)
463 A->order[l_s[c][a]][2]=(c==0)?last_i:last_j;
464 A->order[l_s[c][a]][3]=(c==1)?best_i:best_j;
466 e=(c==0)?last_i:last_j;
469 A->order[l_s[c][a]][1]+=1-is_gap(aln[l_s[c][a]][d]);
474 for ( c=0; c< 2; c++)
476 for ( a=0; a< ns[c]; a++)
478 aln[l_s[c][a]]+=(c==0)?last_i:last_j;
480 for ( b=0; b< LEN; b++)
484 char_buf[b]=aln[l_s[c][a]][ch++];
489 aln[l_s[c][a]]-=(c==0)?last_i:last_j;
490 sprintf (aln[l_s[c][a]],"%s", char_buf);
504 if ( long_trace_flag)fclose (long_trace);
511 /*******************************************************************************/
512 /* AUTOMATIC GEP+SCALE PENALTY FOR SW */
517 /*******************************************************************************/
519 Alignment * add_seq2aln (Constraint_list *CL, Alignment *IN,Sequence *S)
529 int ste; /*sequence to extract, last one if they are packed*/
535 if (CL->packed_seq_lu){ste=S->nseq-1;}
540 IN=realloc_aln2(IN, 1, strlen (S->seq[ste])+1);
546 sprintf ( IN->seq_al[0], "%s", S->seq[ste]);
547 sprintf (IN->name[0], "%s_%d_1", S->name[ste],series);
551 IN->len_aln=strlen ( IN->seq_al[0]);
558 IN=realloc_aln2 ( IN, IN->nseq+1,MAX(strlen ( S->seq[ste])+1, IN->len_aln+1));
559 n_groups=(int*)vcalloc ( 2, sizeof (int));
560 group_list=declare_int (2,IN->nseq+1);
562 n_groups[0]=IN->nseq;
563 for ( a=0; a<IN->nseq; a++)group_list[0][a]=a;
566 group_list[1][0]=IN->nseq;
567 sprintf (IN->name[IN->nseq], "%s_%d_%d",S->name[ste],series,IN->nseq+1);
568 sprintf (IN->seq_al[IN->nseq], "%s",S->seq[ste]);
569 IN->order[IN->nseq][0]=ste;
570 IN->order[IN->nseq][1]=0;
574 pair_wise ( IN, n_groups, group_list,CL);