7 #include "io_lib_header.h"
8 #include "util_lib_header.h"
9 #include "define_header.h"
11 #include "dp_lib_header.h"
17 int evaluate_moca_domain ( Alignment *A, Constraint_list *CL)
20 function documentation: start
21 int evaluate_moca_domain ( Alignment *A, Constraint_list *CL)
23 This function evaluates a multiple local alignment
24 If the alignmnent is to be accepted, return score
27 function documentation: end
41 sprintf ( alp, "acefghiklmnpqrstuvwy");
45 score=(int)(output_maln_pval ( "/dev/null", A)*-100);
54 while ((end+1)!=A->len_aln)
56 end=get_nol_aln_border (A,start,GO_RIGHT);
57 if ( end==start)break;
58 fprintf ( stderr, "\n**%d %d (%d)",start, end, A->len_aln);
60 B=extract_aln (B,start,end);
61 for (a=0; a<B->nseq; a++)
62 for ( b=0; b<B->len_aln; b++)
63 if ( is_gap (B->seq_al[a][b]))B->seq_al[a][b]=alp[(int)rand()%(strlen (alp))];
67 fprintf ( stderr, "==>%d",(int)(output_maln_pval ( "/dev/null", B)*-100) );
68 if ( score==UNDEFINED)score=(int)(output_maln_pval ( "/dev/null", B)*-100);
70 score=MAX(score,(int)(output_maln_pval ( "/dev/null", B)*-100));
79 int moca_slow_get_dp_cost ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL)
83 s=slow_get_dp_cost ( A, pos1, ns1, list1, col1, pos2, ns2, list2, col2, CL);
86 if ( s==UNDEFINED)return UNDEFINED;
87 else return s+(CL->moca)->moca_scale;
90 int moca_evaluate_matrix_score ( Constraint_list *CL, int s1, int r1, int s2, int r2)
93 function documentation: start
94 int moca_residue_pair_extended_list ( Constraint_list *CL, int s1, int r1, int s2, int r2 )
96 THIS FUNCTION RETURNS THE EXTENDED SCORE OF A PAIR OF RESIDUES
97 it is meant to work with local aln pair_wise routines, by using (CL->moca)->forbiden_residues
98 a constant value is substracted from the extended score.
100 This function is meant toi be used with omain_dp, therefore, it allows the match of identical residues.
102 function documentation: end
105 if (unpack_seq_residues ( &s1, &r1, &s2, &r2, CL->packed_seq_lu)==UNDEFINED)return UNDEFINED;
106 else if ( (CL->moca)->forbiden_residues && ((CL->moca)->forbiden_residues[s1][r1]==UNDEFINED ||(CL->moca)->forbiden_residues[s2][r2]==UNDEFINED))return UNDEFINED;
107 else if ( s1==s2 && r1 == r2) return UNDEFINED;
108 else return evaluate_matrix_score(CL, s1, r1, s2, r2);
114 int moca_residue_pair_extended_list ( Constraint_list *CL, int s1, int r1, int s2, int r2 )
117 function documentation: start
118 int moca_residue_pair_extended_list ( Constraint_list *CL, int s1, int r1, int s2, int r2 )
120 THIS FUNCTION RETURNS THE EXTENDED SCORE OF A PAIR OF RESIDUES
121 it is meant to work with local aln pair_wise routines, by using (CL->moca)->forbiden_residues
122 a constant value is substracted from the extended score.
124 This function is meant toi be used with omain_dp, therefore, it allows the match of identical residues.
126 function documentation: end
129 if (unpack_seq_residues ( &s1, &r1, &s2, &r2, CL->packed_seq_lu)==UNDEFINED)return UNDEFINED;
130 else if ( (CL->moca)->forbiden_residues && ((CL->moca)->forbiden_residues[s1][r1]==UNDEFINED ||(CL->moca)->forbiden_residues[s2][r2]==UNDEFINED))return UNDEFINED;
131 else if ( s1==s2 && r1 == r2) return UNDEFINED;
132 else return residue_pair_extended_list (CL, s1, r1, s2, r2);
137 int **cache_cl_with_moca_domain (Alignment *A, Constraint_list *CL)
140 function documentation: start
141 int **cache_cl_with_moca_domain (Alignment *A, Constraint_list *CL)
143 Read a multiple alignmnent
144 Given all the residues (CL->S)->seq[x][y] contained in the maln
145 Set (CL->moca)->forbiden_residues[x][y] to UNDEFINED
146 return (CL->moca)->forbiden_residues
149 You must make sure that the evalation strategy uses (CL->moca)->forbiden_residues
150 (CL->moca)->forbiden_residues[0][1]->first residue(1) of First sequence(0)
151 function documentation: end
157 pos=aln2pos_simple(A, A->nseq);
159 if ( !(CL->moca)->forbiden_residues)(CL->moca)->forbiden_residues=declare_int ((CL->S)->nseq, strlen ((CL->S)->seq[0])+1);
161 for ( a=0; a<A->nseq;a++)
162 for ( b=0; b< A->len_aln; b++)
163 (CL->moca)->forbiden_residues[A->order[a][0]][pos[a][b]]=UNDEFINED;
166 return (CL->moca)->forbiden_residues;
168 Alignment *make_moca_nol_aln ( Alignment *A, Constraint_list *CL)
174 /*********************************************************************************************/
176 /* DOMAIN Z SCORE EVALUATION */
178 /*********************************************************************************************/
180 int evaluate_domain_aln_z_score (Alignment *A, int start, int end,Constraint_list *CL, char *alphabet)
184 double score, ref_score;
186 double sum=0, sum2=0;
189 if ( A==NULL || A->nseq==0 || A->len_aln==0)return 0;
190 ref_score=(double)evaluate_domain_aln (A,start, end,CL);
191 for (sum=0, sum2=0,a=0;a<N_EVAL; a++)
193 B=make_random_aln ( B, A->nseq, end-start, alphabet);
194 score=(double)evaluate_domain_aln (B,0,B->len_aln,CL);
198 score=(return_z_score(ref_score, sum, sum2, N_EVAL)*100)/A->len_aln;
203 int evaluate_domain_aln ( Alignment *A, int start, int end,Constraint_list *CL)
210 function documentation: start
212 This function uses a pam250 to evaluate the sum of pairs score of A,
213 between position start(included) to position end (exluded),
215 the numbering starts 0
216 function documentation: end
219 if ( !mat)mat=read_matrice ( "pam250mt");
221 for ( c=start, score=0; c<end; c++)
223 for ( a=0; a< A->nseq-1; a++)
224 for ( b=a+1; b< A->nseq; b++)
226 c1=tolower(A->seq_al[a][c]);
227 c2=tolower(A->seq_al[b][c]);
229 if ( !is_gap (c1) && !is_gap(c2))score+=mat[c1-'A'][c2-'A'];
235 int unpack_seq_residues ( int *s1, int *r1, int *s2, int *r2, int **packed_seq_lu)
237 /* Given a series of sequences concatenated (packed), and the coordinates of two residues
238 This function translates the coordinates into the real ones and allows evaluation
239 Note for this function residues go from [1->N], sequences from [0->N[
240 This is true for in and out comming residues number
241 NOTE: The sequence cannot be guessed when the residues r1 or r2 are GAPS, therefore UNDEFINED is returned
242 NOTE: Concatenated sequences are separated with X, such residues cause an UNDEFINED to be returned
245 if ( packed_seq_lu==NULL)return 1;
246 else if ( s1[0]!=s2[0])return 1;
247 else if ( r1[0]<=0 || r2[0]<=0)return UNDEFINED;
248 else if ( packed_seq_lu[r1[0]][0]==UNDEFINED || packed_seq_lu[r2[0]][0]==UNDEFINED)return UNDEFINED;
251 s1[0]=packed_seq_lu[r1[0]][0];
252 r1[0]=packed_seq_lu[r1[0]][1];
254 s2[0]=packed_seq_lu[r2[0]][0];
255 r2[0]=packed_seq_lu[r2[0]][1];
260 Alignment * unpack_seq_aln ( Alignment *A,Constraint_list *CL)
262 int a, b, r_seq, r_start, r_len;
265 if (!CL->packed_seq_lu) return A;
267 for (a=0; a< A->nseq; a++)
269 r_seq =CL->packed_seq_lu[A->order[a][1]+1][0];
270 r_start=CL->packed_seq_lu[A->order[a][1]+1][1];
272 A->order[a][0]=r_seq;
273 A->order[a][1]=r_start-1;
275 for ( r_len=0,b=0; b< A->len_aln; b++)r_len+=!is_gap(A->seq_al[a][b]);
276 sprintf ( A->name[a],"%s_%d_%d", (A->S)->name[r_seq], r_start, r_start+r_len-1);
283 /******************************COPYRIGHT NOTICE*******************************/
284 /*© Centro de Regulacio Genomica */
286 /*Cedric Notredame */
287 /*Fri Feb 18 08:27:45 CET 2011 - Revision 596. */
288 /*All rights reserved.*/
289 /*This file is part of T-COFFEE.*/
291 /* T-COFFEE is free software; you can redistribute it and/or modify*/
292 /* it under the terms of the GNU General Public License as published by*/
293 /* the Free Software Foundation; either version 2 of the License, or*/
294 /* (at your option) any later version.*/
296 /* T-COFFEE is distributed in the hope that it will be useful,*/
297 /* but WITHOUT ANY WARRANTY; without even the implied warranty of*/
298 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/
299 /* GNU General Public License for more details.*/
301 /* You should have received a copy of the GNU General Public License*/
302 /* along with Foobar; if not, write to the Free Software*/
303 /* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/
304 /*............................................... |*/
305 /* If you need some more information*/
306 /* cedric.notredame@europe.com*/
307 /*............................................... |*/
311 /******************************COPYRIGHT NOTICE*******************************/