7 #include "io_lib_header.h"
8 #include "util_lib_header.h"
9 #include "define_header.h"
10 #include "dp_lib_header.h"
12 Alignment *clean_maln ( Alignment *A, Alignment *I, int T, int n_it)
16 int in_una, in_aln, in_gap, gap, una, aln;
17 int Sstart,Rstate, Sstate;
22 add_warning ( stderr, "\nWARNING: -clean_aln is not supported anymore [PROGRAM:%s]\n", PROGRAM);
29 in_una=a++;in_gap=a++;in_aln=a++; aln=a++;gap=a++;una=a++;
30 segment_list=declare_int ( A->len_aln*A->nseq, 3);
33 /*1: Identify the segments*/
35 for ( a=0; a< A->nseq; a++)
38 for ( b=0; b<A->len_aln; b++)
40 if (is_gap(A->seq_al[a][b]))Rstate=gap;
41 else if ( I->seq_al[a][b]<=T){Rstate=una;}
42 else if ( I->seq_al[a][b]==NO_COLOR_RESIDUE)Rstate=una;
45 if (Rstate==una)C->seq_al[a][b]='-';
53 else if ( Rstate==una)
58 else if ( Rstate==aln)
61 else if ( Sstate==in_gap)
64 else if ( Rstate==una)Sstate=in_una;
65 else if ( Rstate==aln)Sstate=in_aln;
67 else if ( Sstate==in_una)
70 else if ( Rstate==una);
71 else if ( Rstate==aln)
73 segment_list[n_segment][0]=a;
74 segment_list[n_segment][1]=Sstart;
75 segment_list[n_segment][2]=b-Sstart;
83 segment_list[n_segment][0]=a;
84 segment_list[n_segment][1]=Sstart;
85 segment_list[n_segment][2]=b-Sstart;
91 /*2 Realign the segments*/
93 for ( b=0; b< n_it; b++)
95 for ( a=0; a< n_segment; a++)
98 A=realign_segment ( segment_list[a][0], segment_list[a][1], segment_list[a][2], A, C);
103 free_int ( segment_list, -1);
104 make_fast_generic_dp_pair_wise (NULL, NULL, NULL, NULL);
108 Alignment *realign_segment (int seq, int start, int len,Alignment *A, Alignment *C)
110 Alignment *S1=NULL, *S2=NULL, *S3=NULL;
113 static Constraint_list *CL;
116 /*1 Prepare the Constraint list*/
119 CL=vcalloc ( 1, sizeof (Constraint_list));
121 CL->pw_parameters_set=1;
122 CL->M=read_matrice ("blosum62mt");
125 CL->evaluate_residue_pair=evaluate_matrix_score;
126 sprintf ( CL->dp_mode, "myers_miller_pair_wise");
130 S1=extract_aln (S1,0,start);
132 S2=extract_aln (S2, start, start+len);
134 S3=extract_aln (S3, start+len,A->len_aln);
137 /*for (a=0; a<S2->nseq; a++){S2->order[a][1]=0;S2->order[a][0]=a;}*/
140 ungap ( S2->seq_al[seq]);
141 CL->S=A->S;/*aln2seq(S2);*/
142 /*3 Prepare Sequence Presentation*/
143 ns=vcalloc (2, sizeof (int));
144 ls=declare_int (2,S2->nseq);
147 for ( a=0,b=0; a< S2->nseq; a++)if (a!=seq)ls[0][b++]=a;
151 pair_wise (S2, ns, ls, CL);
153 A=realloc_aln (A, strlen (S1->seq_al[0])+ strlen (S2->seq_al[0])+ strlen (S3->seq_al[0])+1);
154 for ( a=0; a< A->nseq; a++)
156 sprintf ( A->seq_al[a], "%s%s%s", S1->seq_al[a], S2->seq_al[a], S3->seq_al[a]);
162 vfree(ns);free_int(ls, -1);
166 Alignment *realign_segment_old (int seq, int start, int len,Alignment *A, Alignment *C)
171 static Dp_Model *M=NULL;
172 static Constraint_list *CL=NULL;
177 /*1 Prepare the Constraint list*/
180 CL=vcalloc ( 1, sizeof (Constraint_list));
182 CL->pw_parameters_set=1;
183 CL->M=read_matrice ("blosum62mt");
186 CL->evaluate_residue_pair=evaluate_matrix_score;
190 S=extract_aln (S, start, start+len);
191 S->len_aln=strlen(S->seq_al[0]);
192 sub_seq=extract_char (A->seq_al[seq], start, len);
196 sprintf ( S->seq_al[seq],"%s", sub_seq);
201 /*2 Prepare the Model*/
202 M=initialize_seg2prf_model((start==0)?2:0,(start+len==A->len_aln)?2:0,CL);
203 M->diag=vcalloc ( 2*len+1, sizeof (int));
204 M->diag[0]=len+strlen (sub_seq)-1;
205 for ( a=1; a<=M->diag[0]; a++)M->diag[a]=a;
207 /*3 Prepare Sequence Presentation*/
208 ns=vcalloc (2, sizeof (int));
209 ls=declare_int (2,A->nseq);
212 for ( a=0,b=0; a< A->nseq; a++)if (a!=seq)ls[0][b++]=a;
216 if ( strlen (sub_seq)!=len)
220 R=make_fast_generic_dp_pair_wise(S, ns, ls, M);
222 for (c=0, b=1,a=start; a< start+len; b++,a++)
224 if (R->traceback[b]==0)
226 A->seq_al[seq][a]=sub_seq[c];
227 C->seq_al[seq][a]=sub_seq[c];
232 A->seq_al[seq][a]='-';
233 C->seq_al[seq][a]='-';
244 free_sequence (CL->S, (CL->S)->nseq);
250 Dp_Model * initialize_seg2prf_model(int left_tg_mode, int right_tg_mode, Constraint_list *CL)
256 M=vcalloc ( 1, sizeof (Dp_Model));
258 M->START=M->nstate++;
263 M->gop=CL->gop*SCORE_K;
264 M->gep=CL->gep*SCORE_K;
266 M->bounded_model=declare_int (M->nstate+1, M->nstate+1);
267 M->model=declare_int (M->nstate+1, M->nstate+1);
268 for ( a=0; a<=M->nstate; a++)
269 for ( b=0; b<= M->nstate; b++)
270 M->model[a][b]=UNDEFINED;
273 M->TYPE=a++;M->LEN_I=a++; M->LEN_J=a++; M->DELTA_I=a++;M->DELTA_J=a++; M->CODING0=a++;M->DELETION=a++;
274 M->model_properties=declare_int ( M->nstate, 10);
277 M->EMISSION=a++;M->TERM_EMISSION=a++;M->START_EMISSION=a++;
278 M->model_emission_function=vcalloc(M->nstate, sizeof (int (**)(Alignment*, int **, int, int*, int, int **, int, int*, int, struct Constraint_list *)));
279 for ( a=0; a< M->nstate; a++)
280 M->model_emission_function[a]=vcalloc(3, sizeof (int (*)(Alignment*, int **, int, int*, int, int **, int, int*, int, struct Constraint_list *)));
284 M->model_properties[0][M->TYPE]=M->CODING0;
285 M->model_properties[0][M->LEN_I]=1;
286 M->model_properties[0][M->LEN_J]=1;
287 M->model_properties[0][M->DELTA_I]=-1;
288 M->model_properties[0][M->DELTA_J]= 0;
290 M->model_emission_function[0][M->EMISSION] =cw_profile_get_dp_cost;
291 M->model_emission_function[0][M->START_EMISSION]=get_start_gep_cost;
292 M->model_emission_function[0][M->TERM_EMISSION] =get_start_gep_cost;
295 M->model_properties[1][M->TYPE]=M->DELETION;
296 M->model_properties[1][M->LEN_I]=1;
297 M->model_properties[1][M->LEN_J]=0;
298 M->model_properties[1][M->DELTA_I]=-1;
299 M->model_properties[1][M->DELTA_J]=+1;
300 M->model_emission_function[1][M->EMISSION]=get_gep_cost;
302 if (left_tg_mode ==2)
303 M->model_emission_function[1][M->START_EMISSION]=get_start_gep_cost;
304 else M->model_emission_function[1][M->START_EMISSION]=get_gep_cost;
306 if (right_tg_mode ==2)
307 M->model_emission_function[1][M->TERM_EMISSION]=get_term_gep_cost;
308 else M->model_emission_function[1][M->TERM_EMISSION]=get_gep_cost;
311 M->model[0][M->END]=M->model[M->START][0]=ALLOWED;
312 M->model[0][1]=M->gop;
313 M->model[0][0]=ALLOWED;
315 M->model[1][M->END]= (right_tg_mode==0)?0:-M->gop;
316 M->model[M->START][1]=( left_tg_mode==0)?M->gop:0;
317 M->model[1][1]=ALLOWED;
318 M->model[1][0]=ALLOWED;
325 for (c=0,a=0, d=0; a< M->START; a++)
326 for ( b=0; b<M->START; b++, d++)
328 if (M->model[a][b]!=UNDEFINED)
330 M->bounded_model[b][1+M->bounded_model[b][0]++]=a;
339 int get_gep_cost (Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL)
341 return CL->gep*SCORE_K;
344 int get_start_gep_cost (Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL)
348 int get_term_gep_cost (Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL)
350 return CL->gep*SCORE_K*-1;
356 /******************************COPYRIGHT NOTICE*******************************/
357 /*© Centro de Regulacio Genomica */
359 /*Cedric Notredame */
360 /*Fri Feb 18 08:27:45 CET 2011 - Revision 596. */
361 /*All rights reserved.*/
362 /*This file is part of T-COFFEE.*/
364 /* T-COFFEE is free software; you can redistribute it and/or modify*/
365 /* it under the terms of the GNU General Public License as published by*/
366 /* the Free Software Foundation; either version 2 of the License, or*/
367 /* (at your option) any later version.*/
369 /* T-COFFEE is distributed in the hope that it will be useful,*/
370 /* but WITHOUT ANY WARRANTY; without even the implied warranty of*/
371 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/
372 /* GNU General Public License for more details.*/
374 /* You should have received a copy of the GNU General Public License*/
375 /* along with Foobar; if not, write to the Free Software*/
376 /* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/
377 /*............................................... |*/
378 /* If you need some more information*/
379 /* cedric.notredame@europe.com*/
380 /*............................................... |*/
384 /******************************COPYRIGHT NOTICE*******************************/