7 #include "io_lib_header.h"
8 #include "util_lib_header.h"
9 #include "define_header.h"
11 #include "dp_lib_header.h"
12 float compute_lambda (int **matrix,char *alphabet);
13 /*********************************************************************************************/
15 /* FUNCTIONS FOR EVALUATING THE CONSISTENCY BETWEEN ALN AND CL */
17 /*********************************************************************************************/
19 /*Fast: score= extended_res/max_extended_residue_for the whole aln
20 slow: score= extended_res/sum all extended score for that residue
21 non_extended score= non_ext /sum all non extended score for that residue
22 heuristic score= extended /sum of extended score of all pairs in the library
23 (i.e. Not ALL the possible pairs)
25 Alignment * main_coffee_evaluate_output2 ( Alignment *IN,Constraint_list *CL, const char *mode );
26 int sub_aln2ecl_raw_score (Alignment *A, Constraint_list *CL, int ns, int *ls)
29 int p1,r1, r2, s1, s2;
33 A=reorder_aln (A, (CL->S)->name,(CL->S)->nseq);
34 pos=aln2pos_simple ( A,A->nseq);
36 CL=index_res_constraint_list (CL, CL->weight_field);
38 for (p1=0; p1<A->len_aln; p1++)
40 for (s1=0; s1<ns-1; s1++)
42 for (s2=s1+1; s2<ns; s2++)
49 score+= residue_pair_extended_list_pc (CL,ls[s1], r1, ls[s2], r2)*SCORE_K;
56 return (score/(((ns*ns)-ns)/2))/A->len_aln;
58 int aln2ecl_raw_score (Alignment *A, Constraint_list *CL)
61 int p1,r1, r2, s1, s2;
64 A=reorder_aln (A, (CL->S)->name,(CL->S)->nseq);
65 pos=aln2pos_simple ( A,A->nseq);
67 CL=index_res_constraint_list (CL, CL->weight_field);
69 for (p1=0; p1<A->len_aln; p1++)
71 for (s1=0; s1<A->nseq-1; s1++)
73 for (s2=s1+1; s2<A->nseq; s2++)
78 if (r1>0 && r2>0)score+= residue_pair_extended_list_pc (CL,s1, r1, s2, r2);
84 return (score/(((A->nseq*A->nseq)-A->nseq)/2))/A->len_aln;
86 int node2sub_aln_score (Alignment *A,Constraint_list *CL, char *mode, NT_node T)
88 if ( !T || !T->right ||!T->left)return 0;
93 ns=vcalloc (2, sizeof (int));
94 ls=vcalloc (2, sizeof (int*));
95 ns[0]= (T->left)->nseq;
96 ns[1]=(T->right)->nseq;
97 ls[0]= (T->left)->lseq;
98 ls[1]=(T->right)->lseq;
100 return sub_aln2sub_aln_score (A, CL, mode, ns, ls);
104 int sub_aln2sub_aln_score ( Alignment *A,Constraint_list *CL, const char *mode, int *ns, int **ls)
106 /*Warning: Does Not Take Gaps into account*/
112 /*Make sure evaluation functions update their cache if needed*/
113 A=update_aln_random_tag (A);
114 pos=aln2pos_simple ( A, -1, ns, ls);
115 for (a=0; a< A->len_aln; a++)
116 score+=CL->get_dp_cost (A, pos, ns[0], ls[0], a, pos, ns[1],ls[1], a, CL);
119 score=(int)(((float)score)/(A->len_aln*SCORE_K));
120 score=(int)(CL->L && CL->normalise)?((score*MAXID)/(CL->normalise)):(score);
123 int sub_aln2sub_aln_raw_score ( Alignment *A,Constraint_list *CL, const char *mode, int *ns, int **ls)
125 /*Warning: Does Not Take Gaps into account*/
131 /*Make sure evaluation functions update their cache if needed*/
132 A=update_aln_random_tag (A);
133 pos=aln2pos_simple ( A, -1, ns, ls);
134 for (a=0; a< A->len_aln; a++)
135 score+=CL->get_dp_cost (A, pos, ns[0], ls[0], a, pos, ns[1],ls[1], a, CL);
140 Alignment* main_coffee_evaluate_output_sub_aln ( Alignment *A,Constraint_list *CL, const char *mode, int *n_s, int **l_s)
142 Alignment *SUB1, *SUB2, *SUB3;
143 int a, b, c,*list_seq;
146 if (strm ( CL->evaluate_mode, "no"))return NULL;
149 list_seq=vcalloc (n_s[0]+n_s[1], sizeof (int));
150 for (b=0, a=0; a< 2; a++){for (c=0;c< n_s[a]; c++)list_seq[b++]=l_s[a][c];}
153 SUB1=copy_aln (A, NULL);
154 SUB2=extract_sub_aln (SUB1,n_s[0]+n_s[1],list_seq);
155 SUB3=main_coffee_evaluate_output (SUB2,CL,CL->evaluate_mode);
163 Alignment * overlay_alignment_evaluation ( Alignment *I, Alignment *O)
168 if ( !I || !O) return O;
169 if ( I->len_aln!=O->len_aln)printf_exit (EXIT_FAILURE, stderr, "ERROR: Incompatible alignments in overlay_alignment_evaluation");
171 buf=vcalloc ( MAX(I->len_aln, O->len_aln), sizeof (int));
173 for (a=0; a<O->nseq; a++)
175 if (!strm (I->name[a], O->name[a]))printf_exit (EXIT_FAILURE, stderr, "ERROR: Incompatible alignments in overlay_alignment_evaluation");
176 for (b=0; b<O->len_aln; b++)
179 if ( islower(r))O->seq_al[a][b]=0;
180 else if (r<=9 || (r>='0' && r<='9'))O->seq_al[a][b]=I->seq_al[a][b];
186 Alignment * main_coffee_evaluate_output ( Alignment *IN,Constraint_list *CL, const char *mode )
189 Alignment *TopS=NULL, *LastS=NULL, *CurrentS=NULL;
196 CurrentS= main_coffee_evaluate_output2(IN, CL, mode);
197 if (!TopS)LastS=TopS=CurrentS;
208 Alignment * main_coffee_evaluate_output2 ( Alignment *IN,Constraint_list *CL, const char *mode )
211 /*Make sure evaluation functions update their cache if needed*/
212 IN=update_aln_random_tag (IN);
214 if ( CL->evaluate_residue_pair==evaluate_matrix_score || CL->ne==0 ||strm ( mode , "categories") || strm ( mode , "matrix")|| strm(mode, "sar")|| strstr (mode, "boxshade") )
217 if ( strm ( mode , "categories")) return categories_evaluate_output (IN, CL);
218 else if ( strm ( mode , "matrix"))return matrix_evaluate_output (IN, CL);
219 else if ( strm ( mode, "sar"))return sar_evaluate_output (IN, CL);
220 else if ( strstr ( mode, "boxshade"))return boxshade_evaluate_output (IN, CL, atoi (strstr(mode, "_")+1));
222 else if ( CL->evaluate_residue_pair==evaluate_matrix_score) return matrix_evaluate_output (IN, CL);
223 else if ( CL->ne==0) return matrix_evaluate_output (IN, CL);
225 else if ( strm (mode, "no"))return NULL;
226 else if ( strm4 ( mode, "t_coffee_fast","tcoffee_fast","fast_tcoffee", "fast_t_coffee"))
228 return fast_coffee_evaluate_output ( IN,CL);
230 else if ( strm4 ( mode, "t_coffee_slow","tcoffee_slow","slow_tcoffee","slow_t_coffee" ))
232 return slow_coffee_evaluate_output ( IN,CL);
235 else if ( strm4 ( mode, "tcoffee_non_extended","t_coffee_non_extended","non_extended_tcoffee","non_extended_t_coffee"))
237 return non_extended_t_coffee_evaluate_output ( IN,CL);
239 else if ( strm5 ( mode, "tcoffee_heuristic","t_coffee_heuristic","heuristic_tcoffee","heuristic_t_coffee", "dali"))
241 return heuristic_coffee_evaluate_output ( IN,CL);
245 fprintf ( stderr, "\nUNKNOWN MODE FOR ALIGNMENT EVALUATION: *%s* [FATAL:%s]",mode, PROGRAM);
254 Alignment * coffee_evaluate_output ( Alignment *IN,Constraint_list *CL)
256 fprintf ( stderr, "\n[WARNING:%s]THE FUNCTION coffee_evaluate_output IS NOT ANYMORE SUPPORTED\n", PROGRAM);
257 fprintf ( stderr, "\n[WARNING]fast_coffee_evaluate_output WILL BE USED INSTEAD\n");
259 return fast_coffee_evaluate_output (IN,CL);
261 Alignment * matrix_evaluate_output ( Alignment *IN,Constraint_list *CL)
263 int a,b, c,r, s, r1, r2;
280 Residue x: sum of observed extended X.. /sum of possible X..
284 if ( !CL->M)CL->M=read_matrice ("blosum62mt");
286 OUT=copy_aln (IN, OUT);
289 tot_res=declare_double ( IN->nseq, IN->len_aln);
290 max_res=declare_double ( IN->nseq, IN->len_aln);
292 tot_seq=declare_double ( IN->nseq, 1);
293 max_seq=declare_double ( IN->nseq, 1);
294 tot_col=declare_double ( IN->len_aln,1);
295 max_col=declare_double ( IN->len_aln,1);
299 for (a=0; a< IN->len_aln; a++)
301 for ( b=0; b< IN->nseq; b++)
303 r1=tolower(IN->seq_al[b][a]);
304 if ( is_gap(r1))continue;
305 r= CL->M[r1-'A'][r1-'A'];
307 for ( c=0; c<IN->nseq; c++)
309 r2=tolower(IN->seq_al[c][a]);
310 if (b==c || is_gap (r2))continue;
312 s=CL->M[r2-'A'][r1-'A'];
331 for ( a=0; a< IN->nseq; a++)
333 if ( !max_seq[a][0])continue;
335 OUT->score_seq[a]=(tot_seq[a][0]*100)/max_seq[a][0];
336 for (b=0; b< IN->len_aln; b++)
339 if ( is_gap(r1) || !max_res[a][b])continue;
341 r1=(tot_res[a][b]*10)/max_res[a][b];
344 (OUT)->seq_al[a][b]=r1+'0';
348 for ( a=0; a< IN->len_aln; a++)
350 r1=(max_col[a][0]==0)?0:((tot_col[a][0]*10)/max_col[a][0]);
352 (OUT)->seq_al[OUT->nseq][a]=r1+'0';
354 sprintf ( OUT->name[IN->nseq], "cons");
355 if (max_aln)OUT->score_seq[OUT->nseq]=OUT->score_aln=(100*tot_aln)/max_aln;
358 free_double (tot_res,-1);
359 free_double (max_res,-1);
361 free_double (tot_seq,-1);
362 free_double (max_seq,-1);
367 Alignment * sar_evaluate_output ( Alignment *IN,Constraint_list *CL)
369 int a,b, c,r, s, r1, r2;
386 Residue x: sum of observed extended X.. /sum of possible X..
390 if ( !CL->M)CL->M=read_matrice ("blosum62mt");
392 OUT=copy_aln (IN, OUT);
395 tot_res=declare_double ( IN->nseq, IN->len_aln);
396 max_res=declare_double ( IN->nseq, IN->len_aln);
398 tot_seq=declare_double ( IN->nseq, 1);
399 max_seq=declare_double ( IN->nseq, 1);
400 tot_col=declare_double ( IN->len_aln,1);
401 max_col=declare_double ( IN->len_aln,1);
405 for (a=0; a< IN->len_aln; a++)
407 for (b=0; b< IN->nseq; b++)
409 r1=tolower(IN->seq_al[b][a]);
410 for (c=0; c<IN->nseq; c++)
412 r2=tolower(IN->seq_al[c][a]);
415 if ( is_gap(r1) && is_gap(r2))s=0;
436 for ( a=0; a< IN->nseq; a++)
438 if ( !max_seq[a][0])continue;
439 OUT->score_seq[a]=(max_seq[a][0]*100)/max_seq[a][0];
440 for (b=0; b< IN->len_aln; b++)
443 if ( is_gap(r1) || !max_res[a][b])continue;
444 r1=(tot_res[a][b]*10)/max_res[a][b];
447 (OUT)->seq_al[a][b]=r1+'0';
451 for ( a=0; a< IN->len_aln; a++)
453 r1=(max_col[a][0]==0)?0:((tot_col[a][0]*10)/max_col[a][0]);
455 (OUT)->seq_al[OUT->nseq][a]=r1+'0';
457 sprintf ( OUT->name[IN->nseq], "cons");
458 if (max_aln)OUT->score_aln=(100*tot_aln)/max_aln;
461 free_double (tot_res,-1);
462 free_double (max_res,-1);
464 free_double (tot_seq,-1);
465 free_double (max_seq,-1);
469 Alignment * boxshade_evaluate_output ( Alignment *IN,Constraint_list *CL, int T)
478 Residue x: sum of observed extended X.. /sum of possible X..
482 OUT=copy_aln (IN, OUT);
483 aa=declare_int (26, 2);
485 for ( a=0; a< OUT->len_aln; a++)
487 for ( b=0; b< 26; b++){aa[b][1]=0;aa[b][0]='a'+b;}
488 for ( b=0; b< OUT->nseq; b++)
490 r=tolower(OUT->seq_al[b][a]);
491 if ( !is_gap(r))aa[r-'a'][1]++;
493 sort_int ( aa, 2, 1, 0,25);
494 f=(aa[25][1]*100)/OUT->nseq;
499 bs=((f-T)*10)/(100-T);
502 if (bs==10)bs--;bs+='0';
503 for ( b=0; b< OUT->nseq; b++)
505 r=tolower(OUT->seq_al[b][a]);
506 if (r==br && bs>'1')OUT->seq_al[b][a]=bs;
508 OUT->seq_al[b][a]=bs;
511 sprintf ( OUT->name[IN->nseq], "cons");
516 Alignment * categories_evaluate_output ( Alignment *IN,Constraint_list *CL)
522 float score, nseq2, tot_aln;
525 Residue x: sum of observed extended X.. /sum of possible X..
527 OUT=copy_aln (IN, OUT);
528 aa=vcalloc ( 26, sizeof (int));
529 nseq2=IN->nseq*IN->nseq;
531 for (tot_aln=0, a=0; a< IN->len_aln; a++)
533 for (n=0,b=0; b< IN->nseq; b++)
540 aa[tolower(r)-'a']++;
545 for ( score=0,b=0; b< 26; b++){score+=aa[b]*aa[b];aa[b]=0;}
547 score=(n==0)?0:score/n;
551 (OUT)->seq_al[OUT->nseq][a]='0'+r;
553 OUT->score_aln=(tot_aln/OUT->len_aln)*100;
554 sprintf ( OUT->name[IN->nseq], "cons");
559 Alignment * categories_evaluate_output_old ( Alignment *IN,Constraint_list *CL)
565 float score, nseq2, tot_aln, min=0;
568 Residue x: sum of observed extended X.. /sum of possible X..
570 OUT=copy_aln (IN, OUT);
571 aa=vcalloc ( 26, sizeof (int));
572 nseq2=IN->nseq*IN->nseq;
574 for (tot_aln=0, a=0; a< IN->len_aln; a++)
576 for (ng=0,b=0; b< IN->nseq; b++)
583 aa[tolower(r)-'a']++;
586 for (nc=0, b=0; b<26; b++)
594 score=(2*min)/IN->nseq;
599 (OUT)->seq_al[OUT->nseq][a]='0'+r;
602 OUT->score_aln=(tot_aln/OUT->len_aln)*100;
603 sprintf ( OUT->name[IN->nseq], "cons");
608 Alignment * fast_coffee_evaluate_output ( Alignment *IN,Constraint_list *CL)
610 int a,b, c, m,res, s, s1, s2, r1, r2;
614 double score_col=0, score_aln=0, score_res=0;
615 double max_col, max_aln;
616 double *max_seq, *score_seq;
621 /*NORMALIZE: with the highest scoring pair found in the multiple alignment*/
624 if ( !CL->evaluate_residue_pair){fprintf ( stderr, "\nWARNING: CL->evaluate_residue_pair Not set\nSet to: extend_residue_pair\n");CL->evaluate_residue_pair= extend_residue_pair; }
626 OUT=copy_aln (IN, OUT);
627 pos=aln2pos_simple(IN, IN->nseq);
628 pos2=aln2defined_residues (IN, CL);
630 max_seq=vcalloc ( IN->nseq, sizeof (double));
631 score_seq=vcalloc ( IN->nseq, sizeof (double));
635 /*1: Identify the highest scoring pair within the alignment*/
637 for ( m=0, a=0; a< IN->len_aln; a++)
639 for ( b=0; b< IN->nseq; b++)
645 for ( c=0; c< IN->nseq; c++)
649 if ( s1==s2 && !CL->do_self)continue;
651 if ( s1< s2)s=(CL->evaluate_residue_pair)( CL, s1, r1, s2, r2);
652 else s=(CL->evaluate_residue_pair)( CL, s2, r2, s1, r1);
654 s=(s!=UNDEFINED)?s:0;
662 sprintf ( OUT->name[IN->nseq], "cons");
663 for ( max_aln=0,score_aln=0,a=0; a< IN->len_aln; a++)
665 OUT->seq_al[IN->nseq][a]=NO_COLOR_RESIDUE;
666 for ( local_nseq=0,b=0; b<IN->nseq; b++){local_nseq+=(pos[b][a]>0 && pos2[b][a])?1:0;}
667 local_m=m*(local_nseq-1);
669 for ( max_col=0, score_col=0,b=0; b< IN->nseq; b++)
671 OUT->seq_al[b][a]=NO_COLOR_RESIDUE;
675 if (r1<=0 || !pos2[b][a])
680 for ( score_res=0,c=0; c< IN->nseq; c++)
685 if ((s1==s2 && !CL->do_self) || r2<=0 || !pos2[c][a]){continue;}
690 if ( s1< s2)s=(CL->evaluate_residue_pair)( CL, s1, r1, s2, r2);
691 else s=(CL->evaluate_residue_pair)( CL, s2, r2, s1, r1);
692 s=(s!=UNDEFINED)?s:0;
700 res=(local_m==0)?NO_COLOR_RESIDUE:((score_res*10)/local_m);
701 (OUT)->seq_al[b][a]=(res==NO_COLOR_RESIDUE)?res:(MIN(res,9));
706 res=(max_col==0)?NO_COLOR_RESIDUE:((score_col*10)/max_col);
707 OUT->seq_al[IN->nseq][a]=(res==NO_COLOR_RESIDUE)?res:(MIN(res,9));
711 IN->score_aln=OUT->score_aln=(max_aln==0)?0:((score_aln*100)/max_aln);
712 for ( a=0; a< OUT->nseq; a++)
714 OUT->score_seq[a]=(max_seq[a]==0)?0:((score_seq[a]*100)/max_seq[a]);
727 Alignment * slow_coffee_evaluate_output ( Alignment *IN,Constraint_list *CL)
729 int a,b, c,res, s, s1, s2, r1, r2;
732 double max_score_r, score_r, max;
733 double score_col=0, score_aln=0;
734 double max_score_col, max_score_aln;
735 double *max_score_seq, *score_seq;
736 int ***res_extended_weight;
741 Residue x: sum of observed extended X.. /sum of possible X..
747 if ( !CL->evaluate_residue_pair){fprintf ( stderr, "\nWARNING: CL->evaluate_residue_pair Not set\nSet to: extend_residue_pair\n");CL->evaluate_residue_pair= extend_residue_pair; }
750 OUT=copy_aln (IN, OUT);
751 pos=aln2pos_simple(IN, IN->nseq);
752 pos2=aln2defined_residues (IN, CL);
754 max_score_seq=vcalloc ( IN->nseq, sizeof (double));
755 score_seq=vcalloc ( IN->nseq, sizeof (double));
756 res_extended_weight=declare_arrayN(3,sizeof(int), (CL->S)->nseq, (CL->S)->max_len+1, 2);
757 max=(CL->normalise)?(100*CL->normalise)*SCORE_K:100;
759 for (a=0; a< IN->len_aln; a++)
761 for ( b=0; b< IN->nseq-1; b++)
765 for ( c=b+1; c< IN->nseq; c++)
769 if ( s1==s2 && !CL->do_self)continue;
770 else if ( r1<=0 || r2<=0) continue;
773 s=(CL->evaluate_residue_pair)( CL, s1, r1, s2, r2);
774 res_extended_weight[s1][r1][0]+=s*100;
775 res_extended_weight[s2][r2][0]+=s*100;
776 res_extended_weight[s1][r1][1]+=max;
777 res_extended_weight[s2][r2][1]+=max;
784 sprintf ( OUT->name[IN->nseq], "cons");
785 for ( max_score_aln=0,score_aln=0,a=0; a< IN->len_aln; a++)
787 OUT->seq_al[IN->nseq][a]=NO_COLOR_RESIDUE;
788 for ( n_res_in_col=0,b=0; b<IN->nseq; b++){n_res_in_col+=(pos[b][a]>0 && pos2[b][a]>0)?1:0;}
789 for ( max_score_col=0, score_col=0,b=0; b< IN->nseq; b++)
791 OUT->seq_al[b][a]=NO_COLOR_RESIDUE;
794 if (r1<=0 || pos2[b][a]<1)continue;
797 max_score_r =res_extended_weight[s1][r1][1];
798 score_r =res_extended_weight[s1][r1][0];
799 if ( max_score_r==0 && n_res_in_col>1)res=0;
800 else if ( n_res_in_col==1)res=NO_COLOR_RESIDUE;
801 else res=((score_r*10)/max_score_r);
804 (OUT)->seq_al[b][a]=(res==NO_COLOR_RESIDUE)?res:(MIN(res, 9));
805 max_score_col+=max_score_r;
807 max_score_seq[b]+=max_score_r;
808 score_seq[b]+=score_r;
809 max_score_aln+=max_score_r;
812 if ( max_score_col==0 && n_res_in_col>1)res=0;
813 else if ( n_res_in_col<2)res=NO_COLOR_RESIDUE;
814 else res=((score_col*10)/max_score_col);
816 OUT->seq_al[IN->nseq][a]=(res==NO_COLOR_RESIDUE)?res:(MIN(res,9));
819 IN->score_aln=OUT->score_aln=(max_score_aln==0)?0:((score_aln*100)/max_score_aln);
820 for ( a=0; a< OUT->nseq; a++)
822 OUT->score_seq[a]=(max_score_seq[a]==0)?0:((score_seq[a]*100)/max_score_seq[a]);
827 vfree ( max_score_seq);
828 free_arrayN((void*)res_extended_weight, 3);
837 Alignment * heuristic_coffee_evaluate_output ( Alignment *IN,Constraint_list *CL)
839 int a,b, c,res, s, s1, s2, r1, r2;
842 int max_score_r, score_r;
843 double score_col=0, score_aln=0;
844 int max_score_col, max_score_aln;
845 double *max_score_seq, *score_seq;
846 int **tot_extended_weight;
847 int **res_extended_weight;
851 Residue x: sum of observed extended X.. /sum of possible X..
854 if ( !CL->evaluate_residue_pair){fprintf ( stderr, "\nWARNING: CL->evaluate_residue_pair Not set\nSet to: extend_residue_pair\n");CL->evaluate_residue_pair= extend_residue_pair; }
856 OUT=copy_aln (IN, OUT);
857 pos=aln2pos_simple(IN, IN->nseq);
860 max_score_seq=vcalloc ( IN->nseq, sizeof (double));
861 score_seq=vcalloc ( IN->nseq, sizeof (double));
863 tot_extended_weight=list2residue_partial_extended_weight(CL);
864 res_extended_weight=declare_int ((CL->S)->nseq, (CL->S)->max_len+1);
866 for (a=0; a< IN->len_aln; a++)
868 for ( b=0; b< IN->nseq-1; b++)
872 for ( c=b+1; c< IN->nseq; c++)
876 if ( s1==s2 && !CL->do_self)continue;
877 else if ( r1<=0 || r2<=0) continue;
880 if ( s1< s2)s=(CL->evaluate_residue_pair)( CL, s1, r1, s2, r2);
881 else s=(CL->evaluate_residue_pair)( CL, s2, r2, s1, r1);
882 res_extended_weight[s1][r1]+=s;
883 res_extended_weight[s2][r2]+=s;
890 sprintf ( OUT->name[IN->nseq], "cons");
891 for ( max_score_aln=0,score_aln=0,a=0; a< IN->len_aln; a++)
893 OUT->seq_al[IN->nseq][a]=NO_COLOR_RESIDUE;
894 for ( n_res_in_col=0,b=0; b<IN->nseq; b++){n_res_in_col+=(pos[b][a]>0)?1:0;}
895 for ( max_score_col=0, score_col=0,b=0; b< IN->nseq; b++)
897 OUT->seq_al[b][a]=NO_COLOR_RESIDUE;
903 max_score_r =tot_extended_weight[s1][r1];
904 score_r=res_extended_weight[s1][r1];
905 res=(max_score_r==0 ||n_res_in_col<2 )?NO_COLOR_RESIDUE:((score_r*10)/max_score_r);
906 (OUT)->seq_al[b][a]=(res==NO_COLOR_RESIDUE)?res:(MIN(res, 9));
907 max_score_col+=max_score_r;
909 max_score_seq[b]+=max_score_r;
910 score_seq[b]+=score_r;
911 max_score_aln+=max_score_r;
914 res=(max_score_col==0 || n_res_in_col<2)?NO_COLOR_RESIDUE:((score_col*10)/max_score_col);
915 OUT->seq_al[IN->nseq][a]=(res==NO_COLOR_RESIDUE)?res:(MIN(res,9));
918 IN->score_aln=OUT->score_aln=MIN(100,((max_score_aln==0)?0:((score_aln*100)/max_score_aln)));
919 for ( a=0; a< OUT->nseq; a++)
921 OUT->score_seq[a]=MIN(100,((max_score_seq[a]==0)?0:((score_seq[a]*100)/max_score_seq[a])));
926 vfree ( max_score_seq);
928 free_int (tot_extended_weight, -1);
929 free_int (res_extended_weight, -1);
934 Alignment * non_extended_t_coffee_evaluate_output ( Alignment *IN,Constraint_list *CL)
936 int a,b, c,res, s1, s2, r1, r2;
939 int max_score_r, score_r;
940 double score_col=0, score_aln=0;
941 int max_score_col, max_score_aln;
942 double *max_score_seq, *score_seq;
944 int **tot_non_extended_weight;
945 int **res_non_extended_weight;
947 CLIST_TYPE *entry=NULL;
951 entry=vcalloc (CL->entry_len, CL->el_size);
952 if ( !CL->evaluate_residue_pair){fprintf ( stderr, "\nWARNING: CL->evaluate_residue_pair Not set\nSet to: extend_residue_pair\n");CL->evaluate_residue_pair= extend_residue_pair; }
954 OUT=copy_aln (IN, OUT);
955 pos=aln2pos_simple(IN, IN->nseq);
958 max_score_seq=vcalloc ( IN->nseq, sizeof (double));
959 score_seq=vcalloc ( IN->nseq, sizeof (double));
961 tot_non_extended_weight=list2residue_total_weight(CL);
962 res_non_extended_weight=declare_int ((CL->S)->nseq, (CL->S)->max_len+1);
964 for (a=0; a< IN->len_aln; a++)
966 for ( b=0; b< IN->nseq-1; b++)
970 for ( c=b+1; c< IN->nseq; c++)
974 if ( s1==s2 && !CL->do_self)continue;
975 else if ( r1<=0 || r2<=0) continue;
982 if ((l=main_search_in_list_constraint (entry,&p,4,CL))!=NULL)
984 res_non_extended_weight[s1][r1]+=l[WE];
985 res_non_extended_weight[s2][r2]+=l[WE];
991 if ((l=main_search_in_list_constraint (entry,&p,4,CL))!=NULL)
993 res_non_extended_weight[s1][r1]+=l[WE];
994 res_non_extended_weight[s2][r2]+=l[WE];
996 max_score=MAX(max_score,res_non_extended_weight[s1][r1]);
997 max_score=MAX(max_score,res_non_extended_weight[s2][r2]);
1004 sprintf ( OUT->name[IN->nseq], "cons");
1005 for ( max_score_aln=0,score_aln=0,a=0; a< IN->len_aln; a++)
1007 OUT->seq_al[IN->nseq][a]=NO_COLOR_RESIDUE;
1008 for ( local_nseq=0,b=0; b<IN->nseq; b++){local_nseq+=(pos[b][a]>0)?1:0;}
1010 for ( max_score_col=0, score_col=0,b=0; b< IN->nseq; b++)
1012 OUT->seq_al[b][a]=NO_COLOR_RESIDUE;
1018 max_score_r =max_score;/*tot_non_extended_weight[s1][r1];*/
1019 score_r=res_non_extended_weight[s1][r1];
1020 res=(max_score_r==0 || local_nseq<2 )?NO_COLOR_RESIDUE:((score_r*10)/max_score_r);
1022 (OUT)->seq_al[b][a]=(res==NO_COLOR_RESIDUE)?res:(MIN(res, 9));
1023 max_score_col+=max_score_r;
1025 max_score_seq[b]+=max_score_r;
1026 score_seq[b]+=score_r;
1027 max_score_aln+=max_score_r;
1030 res=(max_score_col==0 || local_nseq<2)?NO_COLOR_RESIDUE:((score_col*10)/max_score_col);
1031 OUT->seq_al[IN->nseq][a]=(res==NO_COLOR_RESIDUE)?res:(MIN(res,9));
1034 IN->score_aln=OUT->score_aln=(max_score_aln==0)?0:((score_aln*100)/max_score_aln);
1035 for ( a=0; a< OUT->nseq; a++)
1037 OUT->score_seq[a]=(max_score_seq[a]==0)?0:((score_seq[a]*100)/max_score_seq[a]);
1038 OUT->score_seq[a]=(OUT->score_seq[a]>100)?100:OUT->score_seq[a];
1040 OUT->score_aln=(OUT->score_aln>100)?100:OUT->score_aln;
1043 vfree ( max_score_seq);
1045 free_int (tot_non_extended_weight, -1);
1046 free_int (res_non_extended_weight, -1);
1054 /*********************************************************************************************/
1056 /* PROFILE/PRofile Functions */
1058 /*********************************************************************************************/
1059 int channel_profile_profile (int *prf1, int *prf2, Constraint_list *CL);
1061 Profile_cost_func get_profile_mode_function (char *name, Profile_cost_func func)
1065 static Profile_cost_func *flist;
1066 static char **nlist;
1070 /*The first time: initialize the list of pairwse functions*/
1071 /*If func==NULL:REturns a pointer to the function associated with a name*/
1072 /*If name is empty:Prints the name of the function associated with name*/
1076 flist=vcalloc ( 100, sizeof (Pwfunc));
1077 nlist=declare_char (100, 100);
1079 flist[nfunc]=cw_profile_profile;
1080 sprintf (nlist[nfunc], "cw_profile_profile");
1083 flist[nfunc]=muscle_profile_profile;
1084 sprintf (nlist[nfunc], "muscle_profile_profile");
1087 flist[nfunc]=channel_profile_profile;
1088 sprintf (nlist[nfunc], "channel_profile_profile");
1092 for ( a=0; a<nfunc; a++)
1094 if ( (name && strm (nlist[a],name)) || flist[a]==func)
1096 if (name)sprintf (name,"%s", nlist[a]);
1100 fprintf ( stderr, "\n[%s] is an unknown profile_profile function[FATAL:%s]\n",name, PROGRAM);
1105 int generic_evaluate_profile_score (Constraint_list *CL,Alignment *Profile1,int s1, int r1, Alignment *Profile2,int s2, int r2, Profile_cost_func prf_prf)
1112 /*Generic profile function*/
1115 dummy=vcalloc (10, sizeof(int));
1116 dummy[0]=1;/*Number of Amino acid types on colum*/
1117 dummy[1]=5;/*Length of Dummy*/
1118 dummy[3]='\0';/*Amino acid*/
1119 dummy[4]=1; /*Number of occurences*/
1120 dummy[5]=100; /*Frequency in the MSA column*/
1129 prf1=(Profile1)?(Profile1->P)->count2[r1]:NULL;
1130 prf2=(Profile2)?(Profile2->P)->count2[r2]:NULL;
1132 if (!prf1) {prf1=dummy; prf1[3]=(CL->S)->seq[s1][r1];}
1133 else if (!prf2){prf2=dummy; prf2[3]=(CL->S)->seq[s2][r2];}
1135 score=((prf_prf==NULL)?cw_profile_profile:prf_prf) (prf1, prf2, CL);
1142 int cw_profile_profile_count (int *prf1, int *prf2, Constraint_list *CL)
1144 /*see function aln2count2 for prf structure*/
1150 for ( n=0,a=3; a<prf1[1]; a+=3)
1151 for ( b=3; b<prf2[1]; b+=3)
1157 score+=prf1[a+1]*prf2[b+1]*CL->M[res1-'A'][res2-'A'];
1158 n+=prf1[a+1]*prf2[b+1];
1162 score=(score*SCORE_K)/n;
1165 int muscle_profile_profile (int *prf1, int *prf2, Constraint_list *CL)
1167 /*see function aln2count2 for prf structure*/
1170 double score=0, fg1, fg2, fi, fj, m;
1171 static double *exp_lu;
1174 exp_lu=vcalloc ( 10000, sizeof (double));
1176 for ( a=-1000; a<1000; a++)
1177 exp_lu[a]=exp((double)a);
1182 for (a=3; a<prf1[1]; a+=3)
1185 fi=(double)prf1[a+2]/100;
1187 for ( b=3; b<prf2[1]; b+=3)
1190 fj=(double)prf2[b+2]/100;
1191 /*m=exp((double)CL->M[res1-'A'][res2-'A']);*/
1192 m=exp_lu[CL->M[res1-'A'][res2-'A']];
1197 fg1=(double)prf1[2]/100;
1198 fg2=(double)prf2[2]/100;
1199 score=(score==0)?0:log(score)*(1-fg1)*(1-fg2);
1200 score=(score*SCORE_K);
1201 /*if ( score<-100)fprintf ( stderr, "\nSCORE %d %d", (int)score, cw_profile_profile(prf1, prf2, CL));*/
1205 int cw_profile_profile (int *prf1, int *prf2, Constraint_list *CL)
1207 /*see function aln2count2 for prf structure*/
1213 for ( n=0,a=3; a<prf1[1]; a+=3)
1214 for ( b=3; b<prf2[1]; b+=3)
1219 p=prf1[a+1]*prf2[b+1];
1222 score+=p*CL->M[res1-'A'][res2-'A'];
1224 score=(score*SCORE_K)/((double)(n==0)?1:n);
1227 int cw_profile_profile_old (int *prf1, int *prf2, Constraint_list *CL)
1229 /*see function aln2count2 for prf structure*/
1236 for ( n=0,a=3; a<prf1[1]; a+=3)
1237 for ( b=3; b<prf2[1]; b+=3)
1242 p=prf1[a+1]*prf2[b+1];
1245 score+=p*CL->M[res1-'A'][res2-'A'];
1247 score=(score*SCORE_K)/((double)(n==0)?1:n);
1250 int channel_profile_profile ( int *prf1, int *prf2, Constraint_list *CL)
1259 if (prf1[0]!=prf1[0]){fprintf ( stderr, "\nERROR: Inconsistent number of channels [channel_profile_profile::FATAL%s]", PROGRAM);}
1263 for (a=1, n=0; a<=prf1[0]; a++)
1265 if (prf1[a]>0 && prf2[a]>0)
1267 n++;score+=CL->M[prf1[a]-'A'][prf2[a]-'A'];
1274 score=(n==0)?0:(score*SCORE_K)/n;
1280 /*********************************************************************************************/
1282 /* FUNCTIONS FOR GETING THE COST : (Sequences) ->evaluate_residue_pair */
1284 /*********************************************************************************************/
1285 int evaluate_blast_profile_score (Constraint_list *CL, int s1, int r1, int s2, int r2)
1291 PRF1=(Alignment*)atop(seq2T_value (CL->S, s1, "A", "_RB_"));
1292 PRF2=(Alignment*)atop(seq2T_value (CL->S, s2, "A", "_RB_"));
1294 return generic_evaluate_profile_score (CL,PRF1,s1, r1, PRF2,s2, r2, CL->profile_mode);
1297 int evaluate_aln_profile_score (Constraint_list *CL, int s1, int r1, int s2, int r2)
1300 return generic_evaluate_profile_score (CL,seq2R_template_profile((CL->S),s1),s1, r1, seq2R_template_profile(CL->S,s2),s2, r2, CL->profile_mode);
1304 int evaluate_profile_score (Constraint_list *CL,Alignment *Prf1, int s1, int r1, Alignment *Prf2,int s2, int r2)
1307 return generic_evaluate_profile_score (CL, Prf1, s1,r1,Prf2, s2,r2,CL->profile_mode);
1310 int evaluate_cdna_matrix_score (Constraint_list *CL, int s1, int r1, int s2, int r2)
1319 a1=translate_dna_codon((CL->S)->seq[s1]+r1,'x');
1320 a2=translate_dna_codon((CL->S)->seq[s2]+r2,'x');
1324 if (a1=='x' || a2=='x')return 0;
1325 else return CL->M[a1-'A'][a2-'A']*SCORE_K;
1332 int evaluate_physico_score ( Constraint_list *CL, int s1, int r1, int s2, int r2)
1336 static float **prop_table;
1339 if (r1<0 || r2<0)return 0;
1342 prop_table= initialise_aa_physico_chemical_property_table(&n_prop);
1343 for (p=0; p< n_prop; p++)max+=100;
1346 a=tolower (( CL->S)->seq[s1][r1]);
1347 b=tolower (( CL->S)->seq[s2][r2]);
1349 for (tot=0,p=0; p< n_prop; p++)
1351 tot+=(double)(prop_table[p][a]-prop_table[p][b])*(prop_table[p][a]-prop_table[p][b]);
1354 tot=(sqrt(tot)/max)*10;
1356 return (int) tot*SCORE_K;
1361 int evaluate_diaa_matrix_score ( Constraint_list *CL, int s1, int r1, int s2, int r2)
1375 m=declare_arrayN(4, sizeof (int), 26, 26, 26, 26);
1376 fp=vfopen ("diaa_mat.mat", "r");
1377 while ((c=fgetc (fp))!=EOF)
1381 buf=vfgets(buf, fp);
1386 sscanf (buf, "%s %s %d %d", k1, k2, &v1, &v2);
1388 m[k1[0]-'a'][k1[1]-'a'][k2[0]-'a'][k2[1]-'a']=v1;
1389 m[k2[0]-'a'][k2[1]-'a'][k1[0]-'a'][k1[1]-'a']=v1;
1393 alp=vcalloc (256, sizeof (int));
1394 for (a=0; a<26; a++)alp[a+'a']=1;
1407 char aa1, aa2, aa3, aa4, u;
1414 aa1=tolower((CL->S)->seq[s1][r1-1]);
1415 aa2=tolower((CL->S)->seq[s1][r1]);
1416 aa3=tolower((CL->S)->seq[s2][r2-1]);
1417 aa4=tolower((CL->S)->seq[s2][r2]);
1418 u=alp[(int)aa1];u+=alp[(int)aa2];u+=alp[(int)aa3];u+=alp[(int)aa4];
1421 s+=m[aa1-'a'][aa2-'a'][aa3-'a'][aa4-'a'];
1426 aa1=tolower((CL->S)->seq[s1][r1]);
1427 aa2=tolower((CL->S)->seq[s1][r1+1]);
1428 aa3=tolower((CL->S)->seq[s2][r2]);
1429 aa4=tolower((CL->S)->seq[s2][r2+1]);
1430 u=alp[(int)aa1];u+=alp[(int)aa2];u+=alp[(int)aa3];u+=alp[(int)aa4];
1433 s+=m[aa1-'a'][aa2-'a'][aa3-'a'][aa4-'a'];
1436 if (n)return (s*SCORE_K)/n;
1440 int evaluate_monoaa_matrix_score ( Constraint_list *CL, int s1, int r1, int s2, int r2)
1454 m=declare_arrayN(2, sizeof (int), 26, 26);
1455 fp=vfopen ("monoaa_mat.mat", "r");
1456 while ((c=fgetc (fp))!=EOF)
1460 buf=vfgets(buf, fp);
1465 sscanf (buf, "%s %s %d %d", k1, k2, &v1, &v2);
1467 m[k1[0]-'a'][k2[0]-'a']=v1;
1468 m[k2[0]-'a'][k1[0]-'a']=v1;
1472 alp=vcalloc (256, sizeof (int));
1473 for (a=0; a<26; a++)alp[a+'a']=1;
1493 aa1=tolower((CL->S)->seq[s1][r1]);
1494 aa3=tolower((CL->S)->seq[s2][r2]);
1495 u=alp[(int)aa1];u+=alp[(int)aa3];
1498 s+=m[aa1-'a'][aa3-'a'];
1503 if (n)return (s*SCORE_K)/n;
1507 int evaluate_matrix_score ( Constraint_list *CL, int s1, int r1, int s2, int r2)
1510 if ( seq2R_template_profile (CL->S,s1) ||seq2R_template_profile (CL->S,s2))
1512 return evaluate_aln_profile_score ( CL, s1,r1, s2, r2);
1520 return CL->M[(CL->S)->seq[s1][r1]-'A'][(CL->S)->seq[s2][r2]-'A']*SCORE_K;
1525 int *get_curvature ( int s1, Constraint_list *CL);
1526 int evaluate_curvature_score( Constraint_list *CL, int s1, int r1, int s2, int r2)
1533 if (!st) st= vcalloc ((CL->S)->nseq, sizeof (char*));
1536 st[s1]=get_curvature (s1, CL);
1540 st[s2]=get_curvature (s2, CL);
1556 //HERE ("%d", score);
1557 //if (score<0)HERE ("%d", score);
1566 int *get_curvature ( int s1, Constraint_list *CL)
1571 char name [1000], b1[100], b2[100];
1573 sprintf ( name, "%s.curvature", (CL->S)->name[s1]);
1574 array=vcalloc (strlen ((CL->S)->seq[s1]), sizeof (int));
1575 fp=vfopen ( name, "r");
1576 while ( fscanf (fp, "%s %d %c %f\n",b1, &a, &c,&f )==4)
1578 if ( c!=(CL->S)->seq[s1][n]){HERE ("ERROR: %c %c", c,(CL->S)->seq[s1][n] );exit (0);}
1579 else array[n++]=(int)(float)100*(float)f;
1584 int evaluate_tm_matrix_score ( Constraint_list *CL, int s1, int r1, int s2, int r2)
1592 tmat=read_matrice ("blosum62mt");
1595 if (!st) st= vcalloc ((CL->S)->nseq, sizeof (char*));
1596 if (!st[s1])st[s1]=seq2T_template_string((CL->S),s1);
1597 if (!st[s2])st[s2]=seq2T_template_string((CL->S),s2);
1609 if (st[s1])p1=tolower (st[s1][r1]);
1610 if (st[s2])p2=tolower (st[s2][r2]);
1612 if ( p1=='h' && p2=='h')return tmat[(CL->S)->seq[s1][r1]-'A'][(CL->S)->seq[s2][r2]-'A']*SCORE_K;
1613 //else if (p1=='h' || p2=='h' ) return -100*SCORE_K;
1614 else return CL->M[(CL->S)->seq[s1][r1]-'A'][(CL->S)->seq[s2][r2]-'A']*SCORE_K;
1622 int evaluate_ssp_matrix_score ( Constraint_list *CL, int s1, int r1, int s2, int r2)
1625 static int **alpha, **beta, **coil;
1630 beta=read_matrice ("beta_mat");
1631 alpha=read_matrice ("alpha_mat");
1632 coil=read_matrice ("coil_mat");
1635 if (!st) st= vcalloc ((CL->S)->nseq, sizeof (char*));
1636 if (!st[s1])st[s1]=seq2E_template_string((CL->S),s1);
1637 if (!st[s2])st[s2]=seq2E_template_string((CL->S),s2);
1640 if ( !st[s1])HERE ("1******");
1641 if ( !st[s2])HERE ("2******");
1653 if (st[s1])p1=tolower (st[s1][r1]);
1654 if (st[s2])p2=tolower (st[s2][r2]);
1656 if (p1!=-1 && p1==p2)F=1.3;
1660 score= CL->M[(CL->S)->seq[s1][r1]-'A'][(CL->S)->seq[s2][r2]-'A']*F*SCORE_K;
1673 int evaluate_combined_matrix_score ( Constraint_list *CL, int s1, int r1, int s2, int r2)
1676 function documentation: start
1678 int evaluate_matrix_score ( Constraint_list *CL, int s1, int s2, int r1, int r2)
1680 this function evaluates the score for matching residue S1(r1) wit residue S2(r2)
1683 function documentation: end
1686 if ( seq2R_template_profile (CL->S,s1) ||seq2R_template_profile (CL->S,s2))
1688 return evaluate_aln_profile_score ( CL, s1,r1, s2, r2);
1695 if (r1==0 || r2==0)return CL->M[(CL->S)->seq[s1][r1]-'A'][(CL->S)->seq[s2][r2]-'A']*SCORE_K;
1698 int A1, A2, B1, B2, a2, b2;
1701 A1=toupper((CL->S)->seq[s1][r1-1]);
1702 A2=toupper((CL->S)->seq[s1][r1]);
1703 B1=toupper((CL->S)->seq[s2][r2-1]);
1704 B2=toupper((CL->S)->seq[s2][r2]);
1708 A1-='A';A2-='A';B1-='A'; B2-='A';a2-='A';b2-='A';
1710 score=CL->M[a2][b2]-FABS((CL->M[A1][A2])-(CL->M[B1][B2]));
1719 int residue_pair_non_extended_list ( Constraint_list *CL, int s1, int r1, int s2, int r2 )
1723 This is the generic Function->works with everything
1725 int residue_pair_non_extended_list ( Constraint_list *CL, int s1, int r1, int s2, int r2, int field );
1727 Computes the non extended score for aligning residue seq1(r1) Vs seq2(r2)
1729 This function can compare a sequence with itself.
1731 Associated functions: See util constraint list, list extention functions.
1732 function documentation: end
1742 field = CL->weight_field;
1745 if ( r1<=0 || r2<=0)return 0;
1746 else if ( !CL->extend_jit)
1748 if ( !entry) entry=vcalloc (LIST_N_FIELDS , sizeof (int));
1753 if ( r1==r2 && s1==s2) return UNDEFINED;
1754 r=main_search_in_list_constraint( entry,&p,4,CL);
1755 if (r==NULL)return 0;
1756 else return r[field]*SCORE_K;
1759 return UNDEFINED;/*ERROR*/
1766 int residue_pair_extended_list_mixt (Constraint_list *CL, int s1, int r1, int s2, int r2 )
1770 score+= residue_pair_extended_list_quadruplet(CL, s1, r1, s2, r2);
1771 score+= residue_pair_extended_list (CL, s1, r1, s2, r2);
1773 return score*SCORE_K;
1776 int residue_pair_extended_list_quadruplet (Constraint_list *CL, int s1, int r1, int s2, int r2 )
1780 int t_s, t_r, t_w, q_s, q_r, q_w;
1785 /* This measure the quadruplets cost on a pair of residues*/
1789 field=CL->weight_field;
1791 if ( r1<=0 || r2<=0)return 0;
1794 hasch=vcalloc ( (CL->S)->nseq, sizeof (int*));
1795 for ( a=0; a< (CL->S)->nseq; a++)hasch[a]=vcalloc ( (CL->S)->len[a]+1, sizeof (int));
1798 CL=index_res_constraint_list ( CL, field);
1799 hasch[s1][r1]=100000;
1800 for (a=1; a< CL->residue_index[s1][r1][0]; a+=3)
1802 t_s=CL->residue_index[s1][r1][a];
1803 t_r=CL->residue_index[s1][r1][a+1];
1804 t_w=CL->residue_index[s1][r1][a+2];
1805 if ( CL->seq_for_quadruplet[t_s])
1807 for ( b=1; b<CL->residue_index[t_s][t_r][0]; b+=3)
1809 q_s=CL->residue_index[t_s][t_r][b];
1810 q_r=CL->residue_index[t_s][t_r][b+1];
1811 q_w=CL->residue_index[t_s][t_r][b+2];
1812 if (CL-> seq_for_quadruplet[q_s])
1813 hasch[q_s][q_r]=MIN(q_w,t_w);
1820 for (a=1; a< CL->residue_index[s2][r2][0]; a+=3)
1822 t_s=CL->residue_index[s2][r2][a];
1823 t_r=CL->residue_index[s2][r2][a+1];
1824 t_w=CL->residue_index[s2][r2][a+2];
1825 if ( CL->seq_for_quadruplet[t_s])
1827 for ( b=1; b<CL->residue_index[t_s][t_r][0]; b+=3)
1829 q_s=CL->residue_index[t_s][t_r][b];
1830 q_r=CL->residue_index[t_s][t_r][b+1];
1831 q_w=CL->residue_index[t_s][t_r][b+2];
1832 if (hasch[q_s][q_r] && CL->seq_for_quadruplet[q_s])
1833 score+=MIN(hasch[q_s][q_r],MIN(q_w,t_w));
1838 score=(CL->normalise)?((score*CL->normalise)/CL->max_ext_value):score;
1840 for (a=1; a< CL->residue_index[s1][r1][0]; a+=3)
1842 t_s=CL->residue_index[s1][r1][a];
1843 t_r=CL->residue_index[s1][r1][a+1];
1844 t_w=CL->residue_index[s1][r1][a+2];
1845 if ( CL->seq_for_quadruplet[t_s])
1847 for ( b=1; b<CL->residue_index[t_s][t_r][0]; b+=3)
1849 q_s=CL->residue_index[t_s][t_r][b];
1850 q_r=CL->residue_index[t_s][t_r][b+1];
1856 return (int)(score*SCORE_K);
1860 Constraint_list * R_extension ( Constraint_list *CL, Constraint_list *R);
1861 int residue_pair_extended_list4rna4 (Constraint_list *CL,int s1, int r1, int s2, int r2 )
1867 sprintf ( CL->rna_lib, "%s", seq2rna_lib (CL->S, NULL));
1870 return residue_pair_extended_list4rna2 (CL, s1, r1, s2,r2);
1872 int residue_pair_extended_list4rna1 (Constraint_list *CL, int s1, int r1, int s2, int r2 )
1874 static Constraint_list *R;
1875 if (!R)R=read_rna_lib (CL->S, CL->rna_lib);
1876 return residue_pair_extended_list4rna (CL, R, s1, r1, s2, r2);
1879 int residue_pair_extended_list4rna3 (Constraint_list *CL,int s1, int r1, int s2, int r2 )
1881 static Constraint_list *R;
1884 R=read_rna_lib (CL->S, CL->rna_lib);
1885 rna_lib_extension (CL,R);
1887 return residue_pair_extended_list (CL, s1,r1, s2,r2);
1890 int residue_pair_extended_list4rna2 (Constraint_list *CL,int s1, int r1, int s2, int r2 )
1892 static Constraint_list *R;
1898 R=read_rna_lib (CL->S, CL->rna_lib);
1899 rna_lib_extension (CL,R);
1903 return residue_pair_extended_list4rna (CL, R, s1, r1, s2, r2);
1905 int residue_pair_extended_list4rna ( Constraint_list *CL,Constraint_list *R, int s1, int r1, int s2, int r2 )
1911 int score=0, score2;
1915 if ( r1<0 || r2<0)return 0;
1919 for (a=1; a<R->residue_index[s1][r1][0]; a+=3)
1921 list1[n1++]=R->residue_index[s1][r1][a+1];
1926 for (a=1; a<R->residue_index[s2][r2][0]; a+=3)
1928 list2[n2++]=R->residue_index[s2][r2][a+1];
1932 score=residue_pair_extended_list ( CL, s1,list1[0], s2,list2[0]);
1934 for (score2=0,a=1; a<n1; a++)
1935 for ( b=1; b<n2; b++)
1936 score2=MAX(residue_pair_extended_list ( CL, s1,list1[a], s2,list2[b]), score2);
1938 if ( n1==1 || n2==1);
1939 else score=MAX(score,score2);
1945 int residue_pair_extended_list4rna_ref ( Constraint_list *CL, int s1, int r1, int s2, int r2 )
1947 static Constraint_list *R;
1951 int score=0, score2;
1957 list=read_lib_list ( CL->rna_lib, &n);
1959 R=declare_constraint_list ( CL->S,NULL, NULL, 0,NULL, NULL);
1961 for (a=0; a< n; a++)
1963 R=read_constraint_list_file (R, list[a]);
1965 R=index_res_constraint_list (R, CL->weight_field);
1969 if ( r1<0 || r2<0)return 0;
1973 for (a=1; a<R->residue_index[s1][r1][0]; a+=3)
1975 list1[n1++]=R->residue_index[s1][r1][a+1];
1980 for (a=1; a<R->residue_index[s2][r2][0]; a+=3)
1982 list2[n2++]=R->residue_index[s2][r2][a+1];
1986 score=residue_pair_extended_list ( CL, s1,list1[0], s2,list2[0]);
1988 for (score2=0,a=1; a<n1; a++)
1989 for ( b=1; b<n2; b++)
1990 score2=MAX(residue_pair_extended_list ( CL, s1,list1[a], s2,list2[b]), score2);
1992 if ( n1==1 || n2==1);
1993 else score=MAX(score,score2);
1998 static int ** clean_residue_pair_hasch (int s1, int r1, int s2, int r2,int **hasch, Constraint_list *CL);
1999 int residue_pair_extended_list_raw ( Constraint_list *CL, int s1, int r1, int s2, int r2 )
2012 function documentation: start
2014 int residue_pair_extended_list ( Constraint_list *CL, int s1, int r1, int s2, int r2);
2016 Computes the extended score for aligning residue seq1(r1) Vs seq2(r2)
2017 Computes: matrix_score
2021 The extended score depends on the function index_res_constraint_list.
2022 This function can compare a sequence with itself.
2024 Associated functions: See util constraint list, list extention functions.
2026 function documentation: end
2029 field=CL->weight_field;
2031 if ( r1<=0 || r2<=0)return 0;
2032 if ( !hasch || max_len!=(CL->S)->max_len)
2034 max_len=(CL->S)->max_len;
2035 if ( hasch) free_int ( hasch, -1);
2036 hasch=declare_int ( (CL->S)->nseq, (CL->S)->max_len+1);
2039 CL=index_res_constraint_list ( CL, field);
2041 /* Check matches for R1 in the indexed lib*/
2042 hasch[s1][r1]=FORBIDEN;
2043 for (a=1; a< CL->residue_index[s1][r1][0]; a+=3)
2045 t_s=CL->residue_index[s1][r1][a];
2046 t_r=CL->residue_index[s1][r1][a+1];
2047 hasch[t_s][t_r]=CL->residue_index[s1][r1][a+2];
2050 /*Check Matches for r1 <-> r2 in the indexed lib */
2051 for (a=1; a< CL->residue_index[s2][r2][0]; a+=3)
2053 t_s=CL->residue_index[s2][r2][a];
2054 t_r=CL->residue_index[s2][r2][a+1];
2057 if (hasch[t_s][t_r])
2059 if (hasch[t_s][t_r]==FORBIDEN)
2061 score+=CL->residue_index[s2][r2][a+2];
2065 delta=MIN(hasch[t_s][t_r],CL->residue_index[s2][r2][a+2]);
2071 clean_residue_pair_hasch ( s1, r1,s2, r2, hasch, CL);
2074 int residue_pair_extended_list_pc ( Constraint_list *CL, int s1, int r1, int s2, int r2 )
2088 function documentation: start
2090 int residue_pair_extended_list ( Constraint_list *CL, int s1, int r1, int s2, int r2);
2092 Computes the extended score for aligning residue seq1(r1) Vs seq2(r2)
2093 Computes: matrix_score
2097 The extended score depends on the function index_res_constraint_list.
2098 This function can compare a sequence with itself.
2100 Associated functions: See util constraint list, list extention functions.
2102 function documentation: end
2105 field=CL->weight_field;
2107 if ( r1<=0 || r2<=0)return 0;
2108 if ( !hasch || max_len!=(CL->S)->max_len)
2110 max_len=(CL->S)->max_len;
2111 if ( hasch) free_int ( hasch, -1);
2112 hasch=declare_int ( (CL->S)->nseq, (CL->S)->max_len+1);
2115 CL=index_res_constraint_list ( CL, field);
2117 /* Check matches for R1 in the indexed lib*/
2118 hasch[s1][r1]=FORBIDEN;
2119 for (a=1; a< CL->residue_index[s1][r1][0]; a+=3)
2121 t_s=CL->residue_index[s1][r1][a];
2122 t_r=CL->residue_index[s1][r1][a+1];
2123 hasch[t_s][t_r]=CL->residue_index[s1][r1][a+2];
2126 /*Check Matches for r1 <-> r2 in the indexed lib */
2127 for (a=1; a< CL->residue_index[s2][r2][0]; a+=3)
2129 t_s=CL->residue_index[s2][r2][a];
2130 t_r=CL->residue_index[s2][r2][a+1];
2133 if (hasch[t_s][t_r])
2135 if (hasch[t_s][t_r]==FORBIDEN)
2137 score+=((float)CL->residue_index[s2][r2][a+2]/NORM_F);
2141 //delta=((float)hasch[t_s][t_r]/NORM_F)*((float)CL->residue_index[s2][r2][a+2]/NORM_F);
2142 delta=MIN((((float)hasch[t_s][t_r]/NORM_F)),(((float)CL->residue_index[s2][r2][a+2]/NORM_F)));
2148 clean_residue_pair_hasch ( s1, r1,s2, r2, hasch, CL);
2149 score/=(CL->S)->nseq;
2150 return score*NORM_F;
2154 int residue_pair_extended_list ( Constraint_list *CL, int s1, int r1, int s2, int r2 )
2166 new function: self normalized
2167 function documentation: start
2169 int residue_pair_extended_list ( Constraint_list *CL, int s1, int r1, int s2, int r2);
2171 Computes the extended score for aligning residue seq1(r1) Vs seq2(r2)
2172 Computes: matrix_score
2176 The extended score depends on the function index_res_constraint_list.
2177 This function can compare a sequence with itself.
2179 Associated functions: See util constraint list, list extention functions.
2181 function documentation: end
2184 field=CL->weight_field;
2186 if ( r1<=0 || r2<=0)return 0;
2187 if ( !hasch || max_len!=(CL->S)->max_len)
2189 max_len=(CL->S)->max_len;
2190 if ( hasch) free_int ( hasch, -1);
2191 hasch=declare_int ( (CL->S)->nseq, (CL->S)->max_len+1);
2194 CL=index_res_constraint_list ( CL, field);
2196 /* Check matches for R1 in the indexed lib*/
2197 hasch[s1][r1]=FORBIDEN;
2198 for (a=1; a< CL->residue_index[s1][r1][0]; a+=3)
2200 t_s=CL->residue_index[s1][r1][a];
2201 t_r=CL->residue_index[s1][r1][a+1];
2202 hasch[t_s][t_r]=CL->residue_index[s1][r1][a+2];
2203 max_score+=CL->residue_index[s1][r1][a+2];
2206 /*Check Matches for r1 <-> r2 in the indexed lib */
2207 for (a=1; a< CL->residue_index[s2][r2][0]; a+=3)
2209 t_s=CL->residue_index[s2][r2][a];
2210 t_r=CL->residue_index[s2][r2][a+1];
2213 if (hasch[t_s][t_r])
2215 if (hasch[t_s][t_r]==FORBIDEN)
2217 score+=CL->residue_index[s2][r2][a+2];
2218 max_score+=CL->residue_index[s2][r2][a+2];
2223 delta=MIN(hasch[t_s][t_r],CL->residue_index[s2][r2][a+2]);
2226 max_score-=hasch[t_s][t_r];
2228 max_val=MAX(max_val,delta);
2233 max_score+=CL->residue_index[s2][r2][a+2];
2237 max_score-=hasch[s2][r2];
2238 clean_residue_pair_hasch ( s1, r1,s2, r2, hasch, CL);
2241 if ( max_score==0)score=0;
2242 else if ( CL->normalise)
2244 score=((score*CL->normalise)/max_score)*SCORE_K;
2245 if (max_val> CL->normalise)
2247 score*=max_val/(double)CL->normalise;
2252 int ** clean_residue_pair_hasch (int s1, int r1, int s2, int r2,int **hasch, Constraint_list *CL)
2255 if ( !hasch) return hasch;
2257 for (a=1; a< CL->residue_index[s1][r1][0]; a+=3)
2259 t_s=CL->residue_index[s1][r1][a];
2260 t_r=CL->residue_index[s1][r1][a+1];
2263 hasch[s1][r1]=hasch[s2][r2]=0;
2266 int residue_pair_extended_list_old ( Constraint_list *CL, int s1, int r1, int s2, int r2 )
2276 function documentation: start
2278 int residue_pair_extended_list ( Constraint_list *CL, int s1, int r1, int s2, int r2);
2280 Computes the extended score for aligning residue seq1(r1) Vs seq2(r2)
2281 Computes: matrix_score
2285 The extended score depends on the function index_res_constraint_list.
2286 This function can compare a sequence with itself.
2288 Associated functions: See util constraint list, list extention functions.
2290 function documentation: end
2295 field=CL->weight_field;
2297 if ( r1<=0 || r2<=0)return 0;
2298 if ( !hasch || max_len!=(CL->S)->max_len)
2300 max_len=(CL->S)->max_len;
2301 if ( hasch) free_int ( hasch, -1);
2302 hasch=declare_int ( (CL->S)->nseq, (CL->S)->max_len+1);
2305 CL=index_res_constraint_list ( CL, field);
2307 hasch[s1][r1]=100000;
2308 for (a=1; a< CL->residue_index[s1][r1][0]; a+=3)
2310 t_s=CL->residue_index[s1][r1][a];
2311 t_r=CL->residue_index[s1][r1][a+1];
2312 hasch[t_s][t_r]=CL->residue_index[s1][r1][a+2];
2316 for (a=1; a< CL->residue_index[s2][r2][0]; a+=3)
2318 t_s=CL->residue_index[s2][r2][a];
2319 t_r=CL->residue_index[s2][r2][a+1];
2320 if (hasch[t_s][t_r])
2325 score+=MIN(hasch[t_s][t_r],CL->residue_index[s2][r2][a+2]);
2331 score=(CL->normalise)?((score*CL->normalise)/CL->max_ext_value):score;
2334 for (a=1; a< CL->residue_index[s1][r1][0]; a+=3)
2336 t_s=CL->residue_index[s1][r1][a];
2337 t_r=CL->residue_index[s1][r1][a+1];
2340 hasch[s1][r1]=hasch[s2][r2]=0;
2343 return (int)(score*SCORE_K);
2345 int residue_pair_test_function ( Constraint_list *CL, int s1, int r1, int s2, int r2 )
2357 function documentation: start
2359 int residue_pair_extended_list ( Constraint_list *CL, int s1, int r1, int s2, int r2);
2361 Computes the extended score for aligning residue seq1(r1) Vs seq2(r2)
2362 Computes: matrix_score
2366 The extended score depends on the function index_res_constraint_list.
2367 This function can compare a sequence with itself.
2369 Associated functions: See util constraint list, list extention functions.
2371 function documentation: end
2375 CL->weight_field=WE;
2376 field=CL->weight_field;
2379 if ( r1<=0 || r2<=0)return 0;
2380 if ( !hasch || max_len!=(CL->S)->max_len)
2382 max_len=(CL->S)->max_len;
2383 if ( hasch) free_int ( hasch, -1);
2384 hasch=declare_int ( (CL->S)->nseq, (CL->S)->max_len+1);
2387 CL=index_res_constraint_list ( CL, field);
2390 for (a=1; a< CL->residue_index[s1][r1][0]; a+=3)
2392 t_s=CL->residue_index[s1][r1][a];
2393 t_r=CL->residue_index[s1][r1][a+1];
2394 hasch[t_s][t_r]=CL->residue_index[s1][r1][a+2];
2398 for (a=1; a< CL->residue_index[s2][r2][0]; a+=3)
2400 t_s=CL->residue_index[s2][r2][a];
2401 t_r=CL->residue_index[s2][r2][a+1];
2402 if (hasch[t_s][t_r])
2404 cons1=hasch[t_s][t_r];
2405 cons2=CL->residue_index[s2][r2][a+2];
2406 score +=MIN(cons1,cons2);
2411 score=(CL->normalise)?((score*CL->normalise)/CL->max_ext_value):score;
2413 for (a=1; a< CL->residue_index[s1][r1][0]; a+=3)
2415 t_s=CL->residue_index[s1][r1][a];
2416 t_r=CL->residue_index[s1][r1][a+1];
2419 hasch[s1][r1]=hasch[s2][r2]=0;
2422 return (int)(score*SCORE_K);
2425 int residue_pair_relative_extended_list ( Constraint_list *CL, int s1, int r1, int s2, int r2 )
2434 function documentation: start
2436 int residue_pair_extended_list ( Constraint_list *CL, int s1, int r1, int s2, int r2);
2438 Computes the extended score for aligning residue seq1(r1) Vs seq2(r2)
2439 Computes: matrix_score
2443 The extended score depends on the function index_res_constraint_list.
2444 This function can compare a sequence with itself.
2446 Associated functions: See util constraint list, list extention functions.
2448 function documentation: end
2453 field=CL->weight_field;
2455 if ( r1<=0 || r2<=0)return 0;
2456 if ( !hasch || max_len!=(CL->S)->max_len)
2458 max_len=(CL->S)->max_len;
2459 if ( hasch) free_int ( hasch, -1);
2460 hasch=declare_int ( (CL->S)->nseq, (CL->S)->max_len+1);
2463 CL=index_res_constraint_list ( CL, field);
2465 hasch[s1][r1]=100000;
2466 for (a=1; a< CL->residue_index[s1][r1][0]; a+=3)
2468 t_s=CL->residue_index[s1][r1][a];
2469 t_r=CL->residue_index[s1][r1][a+1];
2470 hasch[t_s][t_r]=CL->residue_index[s1][r1][a+2];
2471 total_score+=CL->residue_index[s1][r1][a+2];
2475 for (a=1; a< CL->residue_index[s2][r2][0]; a+=3)
2477 t_s=CL->residue_index[s2][r2][a];
2478 t_r=CL->residue_index[s2][r2][a+1];
2479 total_score+=CL->residue_index[s1][r1][a+2];
2480 if (hasch[t_s][t_r])
2482 if (field==WE){score+=2*MIN(hasch[t_s][t_r],CL->residue_index[s2][r2][a+2]);}
2486 score=((CL->normalise*score)/total_score);
2489 for (a=1; a< CL->residue_index[s1][r1][0]; a+=3)
2491 t_s=CL->residue_index[s1][r1][a];
2492 t_r=CL->residue_index[s1][r1][a+1];
2495 hasch[s1][r1]=hasch[s2][r2]=0;
2497 return score*SCORE_K;
2499 int residue_pair_extended_list_g_coffee_quadruplet ( Constraint_list *CL, int s1, int r1, int s2, int r2 )
2501 int t_s, t_r, t_w, q_s, q_r, q_w;
2507 /* This measure the quadruplets cost on a pair of residues*/
2509 field=CL->weight_field;
2511 if ( r1<=0 || r2<=0)return 0;
2514 hasch=vcalloc ( (CL->S)->nseq, sizeof (int*));
2515 for ( a=0; a< (CL->S)->nseq; a++)hasch[a]=vcalloc ( (CL->S)->len[a]+1, sizeof (int));
2518 CL=index_res_constraint_list ( CL, field);
2519 hasch[s1][r1]=100000;
2520 for (a=1; a< CL->residue_index[s1][r1][0]; a+=3)
2522 t_s=CL->residue_index[s1][r1][a];
2523 t_r=CL->residue_index[s1][r1][a+1];
2524 t_w=CL->residue_index[s1][r1][a+2];
2525 if ( CL->seq_for_quadruplet[t_s])
2527 for ( b=1; b<CL->residue_index[t_s][t_r][0]; b+=3)
2529 q_s=CL->residue_index[t_s][t_r][b];
2530 q_r=CL->residue_index[t_s][t_r][b+1];
2531 if (CL-> seq_for_quadruplet[q_s])
2534 hasch[q_s][q_r]=MIN(CL->residue_index[t_s][t_r][b+2],t_w);
2541 for (s=0,score=0,a=1; a< CL->residue_index[s2][r2][0]; a+=3)
2543 t_s=CL->residue_index[s2][r2][a];
2544 t_r=CL->residue_index[s2][r2][a+1];
2545 t_w=CL->residue_index[s2][r2][a+2];
2546 if ( CL->seq_for_quadruplet[t_s])
2548 for ( b=1; b<CL->residue_index[t_s][t_r][0]; b+=3)
2550 q_s=CL->residue_index[t_s][t_r][b];
2551 q_r=CL->residue_index[t_s][t_r][b+1];
2552 q_w=CL->residue_index[t_s][t_r][b+2];
2553 if (hasch[q_s][q_r] && CL->seq_for_quadruplet[q_s])
2554 s=MIN(hasch[q_s][q_r],MIN(CL->residue_index[t_s][t_r][b+2],q_w));
2555 score=MAX(score, s);
2560 for (a=1; a< CL->residue_index[s1][r1][0]; a+=3)
2562 t_s=CL->residue_index[s1][r1][a];
2563 t_r=CL->residue_index[s1][r1][a+1];
2564 t_w=CL->residue_index[s1][r1][a+2];
2565 if ( CL->seq_for_quadruplet[t_s])
2567 for ( b=1; b<CL->residue_index[t_s][t_r][0]; b+=3)
2569 q_s=CL->residue_index[t_s][t_r][b];
2570 q_r=CL->residue_index[t_s][t_r][b+1];
2576 return score*SCORE_K;
2578 int residue_pair_extended_list_g_coffee ( Constraint_list *CL, int s1, int r1, int s2, int r2 )
2586 function documentation: start
2588 int residue_pair_extended_list ( Constraint_list *CL, int s1, int r1, int s2, int r2);
2590 Computes the extended score for aligning residue seq1(r1) Vs seq2(r2)
2591 Computes: matrix_score
2595 The extended score depends on the function index_res_constraint_list.
2596 This function can compare a sequence with itself.
2598 Associated functions: See util constraint list, list extention functions.
2600 function documentation: end
2603 field=CL->weight_field;
2605 if ( r1<=0 || r2<=0)return 0;
2608 hasch=vcalloc ( (CL->S)->nseq, sizeof (int*));
2609 for ( a=0; a< (CL->S)->nseq; a++)hasch[a]=vcalloc ( (CL->S)->len[a]+1, sizeof (int));
2612 CL=index_res_constraint_list ( CL, field);
2614 hasch[s1][r1]=100000;
2615 for (a=1; a< CL->residue_index[s1][r1][0]; a+=3)
2617 t_s=CL->residue_index[s1][r1][a];
2618 t_r=CL->residue_index[s1][r1][a+1];
2619 hasch[t_s][t_r]=CL->residue_index[s1][r1][a+2];
2623 for (s=0, score=0,a=1; a< CL->residue_index[s2][r2][0]; a+=3)
2625 t_s=CL->residue_index[s2][r2][a];
2626 t_r=CL->residue_index[s2][r2][a+1];
2628 if (hasch[t_s][t_r])
2631 {s=MIN(hasch[t_s][t_r],CL->residue_index[s2][r2][a+2]);
2637 for (a=1; a< CL->residue_index[s1][r1][0]; a+=3)
2639 t_s=CL->residue_index[s1][r1][a];
2640 t_r=CL->residue_index[s1][r1][a+1];
2643 hasch[s1][r1]=hasch[s2][r2]=0;
2645 return score*SCORE_K;
2648 int extend_residue_pair ( Constraint_list *CL, int s1, int r1, int s2, int r2)
2662 This is the generic Function->works with everything
2663 should be gradually phased out
2666 int extend_residue_pair ( Constraint_list *CL, int s1, int r1, int s2, int r2, int field )
2668 Computes the extended score for aligning residue seq1(r1) Vs seq2(r2)
2669 Computes: matrix_score
2673 The extended score depends on the function index_res_constraint_list.
2674 This function can compare a sequence with itself.
2676 Associated functions: See util constraint list, list extention functions.
2677 function documentation: end
2681 field=CL->weight_field;
2683 if ( r1<=0 || r2<=0)return 0;
2684 else if ( !CL->L && CL->M)
2686 return evaluate_matrix_score (CL, s1,r1, s2, r2);
2689 else if ( !CL->extend_jit)
2691 if ( !entry) entry=vcalloc (LIST_N_FIELDS , sizeof (int));
2696 r=main_search_in_list_constraint( entry,&p,4,CL);
2697 if (r==NULL)return 0;
2698 else return r[field];
2704 hasch=vcalloc ( (CL->S)->nseq, sizeof (int*));
2705 for ( a=0; a< (CL->S)->nseq; a++)hasch[a]=vcalloc ( (CL->S)->len[a]+1, sizeof (int));
2708 CL=index_res_constraint_list ( CL, field);
2710 hasch[s1][r1]=100000;
2711 for (a=1; a< CL->residue_index[s1][r1][0]; a+=3)
2713 t_s=CL->residue_index[s1][r1][a];
2714 t_r=CL->residue_index[s1][r1][a+1];
2715 hasch[t_s][t_r]=CL->residue_index[s1][r1][a+2];
2719 for (a=1; a< CL->residue_index[s2][r2][0]; a+=3)
2721 t_s=CL->residue_index[s2][r2][a];
2722 t_r=CL->residue_index[s2][r2][a+1];
2723 if (hasch[t_s][t_r])
2725 if (field==WE)score+=MIN(hasch[t_s][t_r],CL->residue_index[s2][r2][a+2] );
2729 score=(CL->normalise)?((score*CL->normalise)/CL->max_ext_value):score;
2730 for (a=1; a< CL->residue_index[s1][r1][0]; a+=3)
2732 t_s=CL->residue_index[s1][r1][a];
2733 t_r=CL->residue_index[s1][r1][a+1];
2736 hasch[s1][r1]=hasch[s2][r2]=0;
2738 return (int)(score*SCORE_K);
2741 /*********************************************************************************************/
2743 /* FUNCTIONS FOR GETTING THE PW COST : CL->get_dp_cost */
2745 /*********************************************************************************************/
2746 int get_dp_cost_blosum_matrix (Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL)
2749 static int **matrix;
2751 if (!matrix) matrix=read_matrice ("blosum62mt");
2752 s1=A->order[list1[0]][0];
2753 s2=A->order[list2[0]][0];
2754 r1=pos1[list1[0]][col1];
2755 r2=pos2[list2[0]][col2];
2757 /*dp cost function: works only with two sequences*/
2759 if ( seq2R_template_profile (CL->S,s1) ||seq2R_template_profile (CL->S,s2))
2760 return evaluate_aln_profile_score ( CL, s1,r1, s2, r2) -CL->nomatch*SCORE_K;
2761 else if (r1>0 && r2>0)
2767 return matrix [(CL->S)->seq[s1][r1]-'A'][(CL->S)->seq[s2][r2]-'A']*SCORE_K -CL->nomatch*SCORE_K;
2771 return -CL->nomatch*SCORE_K ;
2773 int get_dp_cost_pam_matrix (Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL)
2776 static int **matrix;
2778 if (!matrix) matrix=read_matrice ("pam250mt");
2779 s1=A->order[list1[0]][0];
2780 s2=A->order[list2[0]][0];
2781 r1=pos1[list1[0]][col1];
2782 r2=pos2[list2[0]][col2];
2784 /*dp cost function: works only with two sequences*/
2787 if ( seq2R_template_profile (CL->S,s1) ||seq2R_template_profile (CL->S,s2))
2788 return evaluate_aln_profile_score ( CL, s1,r1, s2, r2) -CL->nomatch*SCORE_K;
2789 else if (r1>0 && r2>0)
2795 return matrix [(CL->S)->seq[s1][r1]-'A'][(CL->S)->seq[s2][r2]-'A']*SCORE_K -CL->nomatch*SCORE_K;
2799 return -CL->nomatch*SCORE_K ;
2802 int get_dp_cost_pw_matrix (Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL)
2806 s1=A->order[list1[0]][0];
2807 s2=A->order[list2[0]][0];
2808 r1=pos1[list1[0]][col1];
2809 r2=pos2[list2[0]][col2];
2811 /*dp cost function: works only with two sequences*/
2812 if ( seq2R_template_profile (CL->S,s1) ||seq2R_template_profile (CL->S,s2))
2813 return evaluate_aln_profile_score ( CL, s1,r1, s2, r2) -CL->nomatch*SCORE_K;
2814 else if (r1>0 && r2>0)
2820 return CL->M[(CL->S)->seq[s1][r1]-'A'][(CL->S)->seq[s2][r2]-'A']*SCORE_K -CL->nomatch*SCORE_K;
2824 return -CL->nomatch*SCORE_K ;
2827 /*********************************************************************************************/
2829 /* FUNCTIONS FOR GETTING THE COST : CL->get_dp_cost */
2831 /*********************************************************************************************/
2835 int get_cdna_best_frame_dp_cost (Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL)
2842 static Alignment *B;
2845 if ( !score)score=declare_int(3, 2);
2855 if (ns1+ns2>2){fprintf ( stderr, "\nERROR: get_cdna_dp_cost mode is only for pair-wise ALN [FATAL]\n");crash("");}
2857 B=copy_aln (A, NULL);
2859 l1=(int)strlen ( A->seq_al[list1[0]]);
2860 for ( b=0; b<l1; b++)
2861 B->seq_al[list1[0]][b]=translate_dna_codon (A->seq_al[list1[0]]+b, 'x');
2862 l2=(int)strlen ( A->seq_al[list2[0]]);
2863 for ( b=0; b<l2; b++)
2864 B->seq_al[list2[0]][b]=translate_dna_codon (A->seq_al[list2[0]]+b, 'x');
2869 for ( a=0; a< 3; a++)score[a][0]=score[a][1]=0;
2870 for ( a=col1-(n*3),b=col2-(n*3); a<col1+(n*3) ; a++, b++)
2872 if ( a<0 || b<0 || a>=l1 || b>=l2)continue;
2874 a1=tolower(B->seq_al[list1[0]][a]);
2875 a2=tolower(B->seq_al[list2[0]][b]);
2877 score[a%3][0]+=(a1=='x' || a2=='x')?0:CL->M[a1-'A'][a2-'A'];
2881 for ( a=0; a< 3; a++)score[a][0]=(score[a][1]>0)?(score[a][0]/score[a][1]):0;
2882 if ( score[0][0]>score[1][0] && score[0][0]>score[2][0])
2884 else if ( score[1][0]>score[0][0] && score[1][0]>score[2][0])
2892 int get_dp_cost_quadruplet ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL)
2897 if ( ns1==1 || ns2==1)
2898 score=slow_get_dp_cost ( A, pos1, ns1, list1,col1, pos2, ns2, list2, col2, CL);
2900 score=fast_get_dp_cost_quadruplet ( A, pos1, ns1, list1,col1, pos2, ns2, list2, col2, CL);
2902 return (score==UNDEFINED)?UNDEFINED:(score-SCORE_K*CL->nomatch);
2904 int get_dp_cost ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL)
2911 if (A==NULL)return 0;
2913 if (MODE!=2 || MODE==0 || (!CL->L && CL->M) || (!CL->L && CL->T)|| ns1==1 || ns2==1)
2914 score=slow_get_dp_cost ( A, pos1, ns1, list1,col1, pos2, ns2, list2, col2, CL);
2915 else if (MODE==1 || MODE==2)
2916 score=fast_get_dp_cost ( A, pos1, ns1, list1,col1, pos2, ns2, list2, col2, CL);
2922 return (score==UNDEFINED)?UNDEFINED:(score-SCORE_K*CL->nomatch);
2924 int ***make_cw_lu (int **cons, int l, Constraint_list *CL);
2925 int ***make_cw_lu (int **cons, int l, Constraint_list *CL)
2930 lu=declare_arrayN(3, sizeof (int),l,26, 2);
2931 for ( p=0; p<l ; p++)
2933 for (r=0; r<26; r++)
2935 for (a=3; a<cons[p][1]; a+=3)
2937 lu[p][r][0]+=cons[p][a+1]*CL->M[r][cons[p][a]-'A'];
2938 lu[p][r][1]+=cons[p][a+1];
2945 int cw_profile_get_dp_cost ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL)
2947 static int last_tag;
2948 static int *pr, ***lu;
2950 static int *list[2], ns[2], **cons[2], ref;
2951 int eva_col,ref_col, a, p, r;
2957 if (last_tag!=A->random_tag)
2961 last_tag=A->random_tag;
2962 list[0]=list1;ns[0]=ns1;
2963 list[1]=list2;ns[1]=ns2;
2964 free_int (cons[0],-1);free_int (cons[1],-1);free_arrayN((void*)lu,3);
2965 cons[0]=NULL;cons[1]=NULL;lu=NULL;
2967 n1=sub_aln2nseq_prf (A, ns[0], list[0]);
2968 n2=sub_aln2nseq_prf (A, ns[1], list[1]);
2971 cons[0]=sub_aln2count_mat2 (A, ns[0], list[0]);
2972 cons[1]=sub_aln2count_mat2 (A, ns[1], list[1]);
2973 ref=(ns[0]>ns[1])?0:1;
2974 lu=make_cw_lu(cons[ref],(int)strlen(A->seq_al[list[ref][0]]), CL);
2981 r1=A->seq_al[list1[0]][col1];
2982 r2=A->seq_al[list2[0]][col2];
2983 if ( r1!='-' && r2!='-')
2984 return CL->M[r1-'A'][r2-'A']*SCORE_K -SCORE_K*CL->nomatch;
2986 return -SCORE_K*CL->nomatch;
2990 eva_col= (ref==0)?col2:col1;
2991 ref_col= (ref==0)?col1:col2;
2992 pr=cons[1-ref][eva_col];
2994 for (a=3; a< pr[1]; a+=3)
2999 t1+=lu[ref_col][r-'a'][0]*p;
3000 t2+=lu[ref_col][r-'a'][1]*p;
3002 score=(t2==0)?0:(t1*SCORE_K)/t2;
3003 score -=SCORE_K*CL->nomatch;
3007 int cw_profile_get_dp_cost_old ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL)
3009 static int last_tag;
3010 static int **cons1, **cons2;
3014 if (last_tag!=A->random_tag)
3016 last_tag=A->random_tag;
3017 free_int (cons1,-1);free_int (cons2,-1);
3018 cons1=sub_aln2count_mat2 (A, ns1, list1);
3019 cons2=sub_aln2count_mat2 (A, ns2, list2);
3021 score=cw_profile_profile (cons1[col1], cons2[col2], CL)-SCORE_K*CL->nomatch;
3025 int cw_profile_get_dp_cost_window ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL)
3027 static int last_tag;
3028 static int **cons1, **cons2;
3029 int a, score, window_size=5, n, p1, p2;
3032 if (last_tag!=A->random_tag)
3034 last_tag=A->random_tag;
3035 free_int (cons1,-1);free_int (cons2,-1);
3036 cons1=sub_aln2count_mat2 (A, ns1, list1);
3037 cons2=sub_aln2count_mat2 (A, ns2, list2);
3040 for (n=0,score=0,a=0; a<window_size; a++)
3044 if ( p1<0 || cons1[p1][0]==END_ARRAY)continue;
3045 if ( p2<0 || cons2[p2][0]==END_ARRAY)continue;
3046 score+=cw_profile_profile (cons1[col1], cons2[col2], CL)-SCORE_K*CL->nomatch;
3054 int consensus_get_dp_cost ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL)
3056 static int last_tag;
3057 static char *seq1, *seq2;
3060 /*Works only with matrix*/
3061 if (last_tag !=A->random_tag)
3063 int ls1, ls2, lenls1, lenls2;
3065 last_tag=A->random_tag;
3066 vfree (seq1);vfree (seq2);
3067 seq1=sub_aln2cons_seq_mat (A, ns1, list1, "blosum62mt");
3068 seq2=sub_aln2cons_seq_mat (A, ns2, list2, "blosum62mt");
3069 ls1=list1[ns1-1];ls2=list2[ns2-1];
3070 lenls1=(int)strlen (A->seq_al[ls1]); lenls2=(int)strlen (A->seq_al[ls2]);
3073 return (CL->M[seq1[col1]-'A'][seq2[col2]-'A']*SCORE_K)-SCORE_K*CL->nomatch;
3076 int fast_get_dp_cost_quadruplet ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL)
3078 /*WARNING: WORKS ONLY WITH List to Extend*/
3079 /*That function does a quruple extension beween two columns by pooling the residues together*/
3087 int s1, rs1, r1, t_r, t_s,t_w, q_r, q_s, q_w, s2, rs2, r2;
3088 int **buf_pos, buf_ns, *buf_list, buf_col;
3090 static int **hasch1;
3091 static int **hasch2;
3093 static int **n_hasch1;
3094 static int **n_hasch2;
3096 static int **is_in_col1;
3097 static int **is_in_col2;
3118 CL=index_res_constraint_list ( CL, WE);
3122 hasch1=declare_int( (CL->S)->nseq, (CL->S)->max_len+1);
3123 hasch2=declare_int( (CL->S)->nseq, (CL->S)->max_len+1);
3124 n_hasch1=declare_int ( (CL->S)->nseq, (CL->S)->max_len+1);
3125 n_hasch2=declare_int( (CL->S)->nseq, (CL->S)->max_len+1);
3126 is_in_col1=declare_int( (CL->S)->nseq, (CL->S)->max_len+1);
3127 is_in_col2=declare_int( (CL->S)->nseq, (CL->S)->max_len+1);
3130 for ( a=0; a< ns1; a++)
3133 s1=A->order[rs1][0];
3139 is_in_col1[s1][r1]=1;
3140 for (b=1; b< CL->residue_index[s1][r1][0]; b+=3)
3142 t_s=CL->residue_index[s1][r1][b];
3143 t_r=CL->residue_index[s1][r1][b+1];
3144 t_w=CL->residue_index[s1][r1][b+2];
3145 for ( c=1; c< CL->residue_index[t_s][t_r][0]; c+=3)
3147 q_s=CL->residue_index[t_s][t_r][c];
3148 q_r=CL->residue_index[t_s][t_r][c+1];
3149 q_w=CL->residue_index[t_s][t_r][c+2];
3150 hasch1[q_s][q_r]+=MIN(q_w, t_w);
3151 n_hasch1[q_s][q_r]++;
3157 for ( a=0; a< ns2; a++)
3160 s2=A->order[rs2][0];
3166 is_in_col2[s2][r2]=1;
3167 for (b=1; b< CL->residue_index[s2][r2][0]; b+=3)
3169 t_s=CL->residue_index[s2][r2][b];
3170 t_r=CL->residue_index[s2][r2][b+1];
3171 t_w=CL->residue_index[s2][r2][b+2];
3172 for ( c=1; c< CL->residue_index[t_s][t_r][0]; c+=3)
3174 q_s=CL->residue_index[t_s][t_r][c];
3175 q_r=CL->residue_index[t_s][t_r][c+1];
3176 q_w=CL->residue_index[t_s][t_r][c+2];
3177 hasch2[q_s][q_r]+=MIN(t_w, q_w);
3178 n_hasch2[q_s][q_r]++;
3185 for ( a=0; a< ns2; a++)
3188 s2=A->order[rs2][0];
3194 for (b=1; b< CL->residue_index[s2][r2][0]; b+=3)
3196 t_s=CL->residue_index[s2][r2][b];
3197 t_r=CL->residue_index[s2][r2][b+1];
3199 for ( c=1; c< CL->residue_index[t_s][t_r][0]; c+=3)
3201 q_s=CL->residue_index[t_s][t_r][c];
3202 q_r=CL->residue_index[t_s][t_r][c+1];
3203 if ( hasch2[q_s][q_r] && hasch1[q_s][q_r]&& !(is_in_col1[q_s][q_r] || is_in_col2[q_s][q_r]))
3205 score+=MIN(hasch2[q_s][q_r]*(n_hasch1[q_s][q_r]),hasch1[q_s][q_r]*(n_hasch2[q_s][q_r]));
3207 else if ( hasch2[q_s][q_r] && is_in_col1[q_s][q_r])
3209 score+=hasch2[q_s][q_r]*(n_hasch1[q_s][q_r]+1);
3211 else if (hasch1[q_s][q_r] && is_in_col2[q_s][q_r])
3213 score+=hasch1[q_s][q_r]*(n_hasch2[q_s][q_r]+1);
3216 n_hasch2[q_s][q_r]=0;
3220 is_in_col2[s2][r2]=0;
3225 for ( a=0; a< ns1; a++)
3228 s1=A->order[rs1][0];
3234 is_in_col1[s1][r1]=0;
3236 for (b=1; b< CL->residue_index[s1][r1][0]; b+=3)
3238 t_s=CL->residue_index[s1][r1][b];
3239 t_r=CL->residue_index[s1][r1][b+1];
3240 for ( c=1; c< CL->residue_index[t_s][t_r][0]; c+=3)
3242 q_s=CL->residue_index[t_s][t_r][c];
3243 q_r=CL->residue_index[t_s][t_r][c+1];
3245 n_hasch1[q_s][q_r]=0;
3252 score=(score*SCORE_K)/((ns1-n_gap1)*(ns2-n_gap2));
3253 score=(CL->normalise)?((score*CL->normalise)/CL->max_ext_value):score;
3255 return (int)(score-SCORE_K*CL->nomatch);
3259 int fast_get_dp_cost ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL)
3261 /*WARNING: WORKS ONLY WITH List to Extend*/
3269 int s1, rs1, r1, t_r, t_s, s2, rs2, r2;
3270 static int **hasch1;
3271 static int **hasch2;
3273 static int **n_hasch1;
3274 static int **n_hasch2;
3276 static int **is_in_col1;
3277 static int **is_in_col2;
3281 CL=index_res_constraint_list ( CL, WE);
3285 hasch1=declare_int( (CL->S)->nseq, (CL->S)->max_len+1);
3286 hasch2=declare_int( (CL->S)->nseq, (CL->S)->max_len+1);
3287 n_hasch1=declare_int ( (CL->S)->nseq, (CL->S)->max_len+1);
3288 n_hasch2=declare_int( (CL->S)->nseq, (CL->S)->max_len+1);
3289 is_in_col1=declare_int( (CL->S)->nseq, (CL->S)->max_len+1);
3290 is_in_col2=declare_int( (CL->S)->nseq, (CL->S)->max_len+1);
3293 for ( a=0; a< ns1; a++)
3296 s1=A->order[rs1][0];
3302 is_in_col1[s1][r1]=1;
3303 for (b=1; b< CL->residue_index[s1][r1][0]; b+=3)
3305 t_s=CL->residue_index[s1][r1][b];
3306 t_r=CL->residue_index[s1][r1][b+1];
3307 hasch1[t_s][t_r]+=CL->residue_index[s1][r1][b+2];
3308 n_hasch1[t_s][t_r]++;
3314 for ( a=0; a< ns2; a++)
3317 s2=A->order[rs2][0];
3323 is_in_col2[s2][r2]=1;
3324 for (b=1; b< CL->residue_index[s2][r2][0]; b+=3)
3326 t_s=CL->residue_index[s2][r2][b];
3327 t_r=CL->residue_index[s2][r2][b+1];
3329 hasch2[t_s][t_r]+=CL->residue_index[s2][r2][b+2];
3330 n_hasch2[t_s][t_r]++;
3338 for ( a=0; a< ns2; a++)
3341 s2=A->order[rs2][0];
3347 for (b=1; b< CL->residue_index[s2][r2][0]; b+=3)
3349 t_s=CL->residue_index[s2][r2][b];
3350 t_r=CL->residue_index[s2][r2][b+1];
3352 if ( hasch2[t_s][t_r] && hasch1[t_s][t_r]&& !(is_in_col1[t_s][t_r] || is_in_col2[t_s][t_r]))
3354 score+=MIN(hasch2[t_s][t_r]*(n_hasch1[t_s][t_r]),hasch1[t_s][t_r]*(n_hasch2[t_s][t_r]));
3356 else if ( hasch2[t_s][t_r] && is_in_col1[t_s][t_r])
3358 score+=hasch2[t_s][t_r]*(n_hasch1[t_s][t_r]+1);
3360 else if (hasch1[t_s][t_r] && is_in_col2[t_s][t_r])
3362 score+=hasch1[t_s][t_r]*(n_hasch2[t_s][t_r]+1);
3365 n_hasch2[t_s][t_r]=0;
3368 is_in_col2[s2][r2]=0;
3373 for ( a=0; a< ns1; a++)
3376 s1=A->order[rs1][0];
3382 is_in_col1[s1][r1]=0;
3384 for (b=1; b< CL->residue_index[s1][r1][0]; b+=3)
3386 t_s=CL->residue_index[s1][r1][b];
3387 t_r=CL->residue_index[s1][r1][b+1];
3390 n_hasch1[t_s][t_r]=0;
3397 for ( a=0; a< ns1; a++)
3400 s1=A->order[rs1][0];
3406 for (b=1; b< CL->residue_index[s1][r1][0]; b+=3)
3408 t_s=CL->residue_index[s1][r1][b];
3409 t_r=CL->residue_index[s1][r1][b+1];
3411 if ( hasch1[t_s][t_r] && hasch2[t_s][t_r]&& !(is_in_col2[t_s][t_r] || is_in_col1[t_s][t_r]))
3413 score+=MIN(hasch1[t_s][t_r]*(n_hasch2[t_s][t_r]),hasch2[t_s][t_r]*(n_hasch1[t_s][t_r]));
3415 else if ( hasch1[t_s][t_r] && is_in_col2[t_s][t_r])
3417 score+=hasch1[t_s][t_r]*(n_hasch2[t_s][t_r]+1);
3419 else if (hasch2[t_s][t_r] && is_in_col1[t_s][t_r])
3421 score+=hasch2[t_s][t_r]*(n_hasch1[t_s][t_r]+1);
3424 n_hasch1[t_s][t_r]=0;
3427 is_in_col1[s1][r1]=0;
3432 for ( a=0; a< ns2; a++)
3435 s2=A->order[rs2][0];
3441 is_in_col2[s2][r2]=0;
3443 for (b=1; b< CL->residue_index[s2][r2][0]; b+=3)
3445 t_s=CL->residue_index[s2][r2][b];
3446 t_r=CL->residue_index[s2][r2][b+1];
3449 n_hasch2[t_s][t_r]=0;
3454 score=(score*SCORE_K)/((ns1-n_gap1)*(ns2-n_gap2));
3455 score=(CL->normalise)?((score*CL->normalise)/CL->max_ext_value):score;
3457 return (int)(score-SCORE_K*CL->nomatch);
3460 int fast_get_dp_cost_2 ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL)
3464 int a, b, s1, s2,r1, r2;
3470 static int last_ns1;
3471 static int last_ns2;
3472 static int last_top1;
3473 static int last_top2;
3474 static int **check_list;
3477 /*New heuristic get dp_cost on a limited number of pairs*/
3478 /*This is the current default*/
3481 if ( last_ns1==ns1 && last_top1==list1[0] && last_ns2==ns2 && last_top2==list2[0]);
3490 if ( check_list) free_int (check_list, -1);
3491 check_list=declare_int ( (CL->S)->nseq*(CL->S)->nseq, 3);
3493 for ( n_pair=0,a=0; a< ns1; a++)
3496 rs1=A->order[s1][0];
3497 for ( b=0; b< ns2; b++, n_pair++)
3500 rs2=A->order[s2][0];
3501 check_list[n_pair][0]=s1;
3502 check_list[n_pair][1]=s2;
3503 check_list[n_pair][2]=(!CL->DM)?0:(CL->DM)->similarity_matrix[rs1][rs2];
3505 sort_int ( check_list, 3, 2, 0, n_pair-1);
3509 if ( !CL->evaluate_residue_pair){fprintf ( stderr, "\nWARNING: CL->evaluate_residue_pair Not set\nSet to: extend_residue_pair\n");CL->evaluate_residue_pair= extend_residue_pair;}
3512 for ( a=n_pair-1; a>=0; a--)
3514 s1= check_list[a][0];
3515 rs1=A->order[s1][0];
3518 s2= check_list[a][1];
3519 rs2=A->order[s2][0];
3530 if ((s=(CL->evaluate_residue_pair)(CL, rs1, r1, rs2, r2))!=UNDEFINED) score+=s;
3536 if ( res_res>=CL->max_n_pair && CL->max_n_pair!=0)a=0;
3539 score=(res_res==0)?0:( (score)/res_res);
3540 score=score-SCORE_K*CL->nomatch;
3545 int fast_get_dp_cost_3 ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL)
3547 static int last_tag;
3548 static Constraint_list *NCL;
3551 if ( ns1==1 && ns2==1)
3553 return slow_get_dp_cost( A,pos1, ns1,list1, col1, pos2, ns2, list2, col2,CL);
3556 if ( last_tag !=A->random_tag)
3560 last_tag=A->random_tag;
3561 ns=vcalloc (2, sizeof (int));ns[0]=ns1; ns[1]=ns2;
3562 ls=vcalloc (2, sizeof (int*));ls[0]=list1; ls[1]=list2;
3564 NCL=progressive_index_res_constraint_list ( A, ns, ls, CL);
3565 vfree (ls); vfree (ns);
3567 score=residue_pair_extended_list ( NCL,list1[0],col1, list2[0], col2);
3568 score=(CL->normalise)?((score*CL->normalise)/CL->max_ext_value):score;
3569 score=(score-SCORE_K*CL->nomatch);
3575 int slow_get_dp_cost ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL)
3579 int a, b, s1, s2,r1, r2;
3585 static int last_tag;
3588 if (col1<0 || col2<0) return 0;
3589 if ( last_tag !=A->random_tag)
3591 last_tag=A->random_tag;
3594 dummy=vcalloc (10, sizeof(int));
3595 dummy[0]=1;/*Number of Amino acid types on colum*/
3596 dummy[1]=5;/*Length of Dummy*/
3597 dummy[3]='\0';/*Amino acid*/
3598 dummy[4]=1; /*Number of occurences*/
3599 dummy[5]=100; /*Frequency in the MSA column*/
3603 if ( !CL->evaluate_residue_pair){fprintf ( stderr, "\nWARNING: CL->evaluate_residue_pair Not set\nSet to: extend_residue_pair\n");CL->evaluate_residue_pair= extend_residue_pair;}
3605 for ( a=0; a< ns1; a++)
3607 for ( b=0; b<ns2; b++)
3611 rs1=A->order[s1][0];
3615 rs2=A->order[s2][0];
3624 if (r1==0 && r2==0)gap_gap++;
3625 else if ( r1<0 || r2<0) gap_res++;
3629 if ((s=(CL->evaluate_residue_pair)(CL, rs1, r1, rs2, r2))!=UNDEFINED) score+=s;
3641 score=(res_res==0)?0:( (score)/res_res);
3643 return score-SCORE_K*CL->nomatch;
3646 int slow_get_dp_cost_pc ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL)
3650 int a, b, s1, s2,r1, r2;
3656 static int last_tag;
3659 if (col1<0 || col2<0) return 0;
3660 if ( last_tag !=A->random_tag)
3662 last_tag=A->random_tag;
3665 dummy=vcalloc (10, sizeof(int));
3666 dummy[0]=1;/*Number of Amino acid types on colum*/
3667 dummy[1]=5;/*Length of Dummy*/
3668 dummy[3]='\0';/*Amino acid*/
3669 dummy[4]=1; /*Number of occurences*/
3670 dummy[5]=100; /*Frequency in the MSA column*/
3674 if ( !CL->evaluate_residue_pair){fprintf ( stderr, "\nWARNING: CL->evaluate_residue_pair Not set\nSet to: extend_residue_pair\n");CL->evaluate_residue_pair= extend_residue_pair;}
3676 for ( a=0; a< ns1; a++)
3678 for ( b=0; b<ns2; b++)
3682 rs1=A->order[s1][0];
3686 rs2=A->order[s2][0];
3695 if (r1==0 && r2==0)gap_gap++;
3696 else if ( r1<0 || r2<0) gap_res++;
3700 if ((s=residue_pair_extended_list_pc(CL, rs1, r1, rs2, r2))!=UNDEFINED) score+=s;
3712 score=(res_res==0)?0:( (score)/res_res);
3717 int slow_get_dp_cost_test ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL)
3721 int a, b, s1, s2,r1, r2;
3722 int gap_gap=0, gap_res=0, res_res=0, rs1, rs2;
3724 for ( a=0; a< ns1; a++)
3726 for ( b=0; b<ns2; b++)
3730 rs1=A->order[s1][0];
3734 rs2=A->order[s2][0];
3743 if (r1==0 && r2==0)gap_gap++;
3744 else if ( r1<0 || r2<0) gap_res++;
3748 score+=residue_pair_extended_list_raw (CL, rs1, r1, rs2, r2);
3753 return (int)(score*10)/(ns1*ns2);
3756 int sw_get_dp_cost ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL)
3765 for ( a=0; a< ns1; a++)
3768 rs1=A->order[s1][0];
3770 if ( r1<=0)continue;
3771 for ( b=0; b< ns2; b++)
3776 rs2=A->order[s2][0];
3782 if (sw_pair_is_defined (CL, rs1, r1, rs2, r2)==UNDEFINED)return UNDEFINED;
3786 return slow_get_dp_cost ( A, pos1, ns1, list1, col1, pos2, ns2, list2,col2, CL);
3797 int get_domain_dp_cost ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2,Constraint_list *CL , int scale , int gop, int gep)
3800 int a, b, s1, s2,r1, r2;
3808 int flag_list_is_aa_sub_mat=0;
3811 /*Needs to be cleanned After Usage*/
3815 if ( entry==NULL) entry=vcalloc (LIST_N_FIELDS , sizeof (int));
3817 for (a=0; a< ns1; a++)
3820 rs1=A->order[s1][0];
3821 for ( b=0; b<ns2; b++)
3824 rs2=A->order[s2][0];
3828 r1=entry[R1]=pos1[s1][col1];
3829 r2=entry[R2]=pos2[s2][col2];
3831 if ( !flag_list_is_aa_sub_mat)
3833 if ( r1==r2 && rs1==rs2)
3838 else if (r1==0 && r2==0)
3842 else if ( r1<=0 || r2<=0)
3846 else if ((r=main_search_in_list_constraint ( entry,&p,4,CL))!=NULL)
3850 if (r[WE]!=UNDEFINED)
3852 score+=(r[WE]*SCORE_K)+scale;
3856 fprintf ( stderr, "**");
3864 score=((res_res+gap_res)==0)?0:score/(res_res+gap_res);
3868 /*********************************************************************************************/
3870 /* FUNCTIONS FOR ANALYSING AL OR MATRIX */
3872 /*********************************************************************************************/
3874 int aln2n_res ( Alignment *A, int start, int end)
3879 for ( a=start; a<end; a++)for ( b=0; b< A->nseq; b++)score+=!is_gap(A->seq_al[b][a]);
3883 float get_gop_scaling_factor ( int **matrix,float id, int l1, int l2)
3885 return id* get_avg_matrix_mm(matrix, AA_ALPHABET);
3888 float get_avg_matrix_mm ( int **matrix, char *alphabet)
3896 l=MIN(20,(int)strlen (alphabet));
3897 for (naa=0, gop=0,a=0; a<l; a++)
3898 for ( b=0; b<l; b++)
3902 gop+=matrix[alphabet[a]-'A'][alphabet[b]-'A'];
3910 float get_avg_matrix_match ( int **matrix, char *alphabet)
3919 l=MIN(20,(int)strlen (alphabet));
3920 for (gop=0,a=0; a<l; a++)
3921 gop+=matrix[alphabet[a]-'A'][alphabet[a]-'A'];
3927 float get_avg_matrix_diff ( int **matrix1,int **matrix2, char *alphabet)
3937 l=MIN(20,(int)strlen (alphabet));
3938 for (naa=0, gop=0,a=0; a<l; a++)
3941 v1=matrix1[alphabet[a]-'A'][alphabet[a]-'A']-matrix2[alphabet[b]-'A'][alphabet[b]-'A'];
3950 float* set_aa_frequencies ()
3954 /*frequencies tqken from psw*/
3956 frequency=vcalloc (100, sizeof (float));
3957 frequency ['x'-'A']=0.0013;
3958 frequency ['a'-'A']=0.0076;
3959 frequency ['c'-'A']=0.0176;
3960 frequency ['d'-'A']=0.0529;
3961 frequency ['e'-'A']=0.0628;
3962 frequency ['f'-'A']=0.0401;
3963 frequency ['g'-'A']=0.0695;
3964 frequency ['h'-'A']=0.0224;
3965 frequency ['i'-'A']=0.0561;
3966 frequency ['k'-'A']=0.0584;
3967 frequency ['l'-'A']=0.0922;
3968 frequency ['m'-'A']=0.0236;
3969 frequency ['n'-'A']=0.0448;
3970 frequency ['p'-'A']=0.0500;
3971 frequency ['q'-'A']=0.0403;
3972 frequency ['r'-'A']=0.0523;
3973 frequency ['s'-'A']=0.0715;
3974 frequency ['t'-'A']=0.0581;
3975 frequency ['v'-'A']=0.0652;
3976 frequency ['w'-'A']=0.0128;
3977 frequency ['y'-'A']=0.0321;
3978 frequency ['X'-'A']=0.0013;
3979 frequency ['A'-'A']=0.0076;
3980 frequency ['C'-'A']=0.0176;
3981 frequency ['D'-'A']=0.0529;
3982 frequency ['E'-'A']=0.0628;
3983 frequency ['F'-'A']=0.0401;
3984 frequency ['G'-'A']=0.0695;
3985 frequency ['H'-'A']=0.0224;
3986 frequency ['I'-'A']=0.0561;
3987 frequency ['J'-'A']=0.0584;
3988 frequency ['L'-'A']=0.0922;
3989 frequency ['M'-'A']=0.0236;
3990 frequency ['N'-'A']=0.0448;
3991 frequency ['P'-'A']=0.0500;
3992 frequency ['Q'-'A']=0.0403;
3993 frequency ['R'-'A']=0.0523;
3994 frequency ['S'-'A']=0.0715;
3995 frequency ['T'-'A']=0.0581;
3996 frequency ['V'-'A']=0.0652;
3997 frequency ['W'-'A']=0.0128;
3998 frequency ['Y'-'A']=0.0321;
4002 float measure_matrix_pos_avg (int **matrix,char *alphabet)
4008 for ( tot=0,a=0; a< 20; a++)
4009 for ( b=0; b<20; b++)
4011 if (matrix[alphabet[a]-'A'][alphabet[b]-'A']>=0){naa++;tot+=matrix[alphabet[a]-'A'][alphabet[b]-'A'];}
4017 float measure_matrix_enthropy (int **matrix,char *alphabet)
4021 double s, p, q, h=0, tq=0;
4024 /*frequencies tqken from psw*/
4026 frequency=set_aa_frequencies ();
4029 lambda=compute_lambda(matrix,alphabet);
4030 fprintf ( stderr, "\nLambda=%f", (float)lambda);
4032 for ( a=0; a< 20; a++)
4033 for ( b=0; b<=a; b++)
4035 s=matrix[alphabet[a]-'A'][alphabet[b]-'A'];
4038 p=frequency[alphabet[a]-'A']*frequency[alphabet[b]-'A'];
4042 q=exp(lambda*s+log(p));
4045 h+=q*log(q/p)*log(2);
4049 fprintf ( stderr,"\ntq=%f\n", (float)tq);
4053 float compute_lambda (int **matrix,char *alphabet)
4057 double lambda, best_lambda=0, delta, best_delta=0, p, tq,s;
4058 static float *frequency;
4060 if ( !frequency)frequency=set_aa_frequencies ();
4062 for ( lambda=0.001; lambda<1; lambda+=0.005)
4065 for ( a=0; a< 20; a++)
4066 for ( b=0; b<20; b++)
4068 p=frequency[alphabet[a]-'A']*frequency[alphabet[b]-'A'];
4069 s=matrix[alphabet[a]-'A'][alphabet[b]-'A'];
4070 tq+=exp(lambda*s+log(p));
4080 if (delta<best_delta)
4087 fprintf ( stderr, "\n%f %f ", lambda, tq);
4090 fprintf ( stderr, "\nRESULT: %f %f ", best_lambda, best_delta);
4091 return (float) best_lambda;
4096 float evaluate_random_match (char *mat, int n, int len,char *alp)
4099 matrix=read_matrice ( mat);
4100 fprintf ( stderr, "Matrix=%15s ", mat);
4101 return evaluate_random_match2 (matrix, n,len,alp);
4105 float evaluate_random_match2 (int **matrix, int n, int len,char *alp)
4107 int a, b, c, d, c1, c2, tot;
4110 float score_random=0;
4120 freq=set_aa_frequencies ();
4121 list=vcalloc ( 10000, sizeof (char));
4124 for (tot=0,c=0,a=0;a<20; a++)
4126 b=freq[alp[a]-'A']*1000;
4128 for (d=0; d<b; d++, c++)
4135 for (a=0; a< len*n; a++)
4139 score_random+=matrix[list[c1]-'A'][list[c2]-'A'];
4140 score_id+=matrix[list[c1]-'A'][list[c1]-'A'];
4142 while (tot_len< len*n)
4147 if ( matrix[list[c1]-'A'][list[c2]-'A']>=0){score_good+=matrix[list[c1]-'A'][list[c2]-'A']; tot_len++;}
4151 score_random=score_random/tot_len;
4152 score_id=score_id/tot_len;
4153 score_good=score_good/tot_len;
4155 fprintf ( stderr, "Random=%8.3f Id=%8.3f Good=%8.3f [%7.2f]\n",score_random, score_id, score_good, tot_len/tot_try);
4157 return score_random;
4159 float compare_two_mat (char *mat1,char*mat2, int n, int len,char *alp)
4161 int **matrix1, **matrix2;
4163 evaluate_random_match (mat1, n, len,alp);
4164 evaluate_random_match (mat2, n, len,alp);
4165 matrix1=read_matrice ( mat1);
4166 matrix2=read_matrice ( mat2);
4167 matrix1=rescale_matrix(matrix1, 10, alp);
4168 matrix2=rescale_matrix(matrix2, 10, alp);
4169 compare_two_mat_array(matrix1,matrix2, n, len,alp);
4174 int ** rescale_two_mat (char *mat1,char*mat2, int n, int len,char *alp)
4177 int **matrix1, **matrix2;
4179 lambda=measure_lambda2 (mat1, mat2, n, len, alp)*10;
4181 fprintf ( stderr, "\nLambda=%.2f", lambda);
4182 matrix2=read_matrice(mat2);
4183 matrix2=neg_matrix2pos_matrix(matrix2);
4184 matrix2=rescale_matrix( matrix2, lambda,"abcdefghiklmnpqrstvwxyz");
4186 matrix1=read_matrice(mat1);
4187 matrix1=neg_matrix2pos_matrix(matrix1);
4188 matrix1=rescale_matrix( matrix1,10,"abcdefghiklmnpqrstvwxyz");
4190 output_matrix_header ( "stdout", matrix2, alp);
4191 evaluate_random_match2(matrix1, 1000, 100, alp);
4192 evaluate_random_match2(matrix2, 1000, 100, alp);
4193 compare_two_mat_array(matrix1,matrix2, n, len,alp);
4197 float measure_lambda2(char *mat1,char*mat2, int n, int len,char *alp)
4202 m1=read_matrice (mat1);
4203 m2=read_matrice (mat2);
4205 m1=neg_matrix2pos_matrix(m1);
4206 m2=neg_matrix2pos_matrix(m2);
4208 f1=measure_matrix_pos_avg( m1, alp);
4209 f2=measure_matrix_pos_avg( m2, alp);
4215 float measure_lambda (char *mat1,char*mat2, int n, int len,char *alp)
4218 int **matrix1, **matrix2, **mat;
4220 float best_quality=0, quality=0, best_lambda=0;
4222 matrix1=read_matrice ( mat1);
4223 matrix2=read_matrice ( mat2);
4224 matrix1=rescale_matrix(matrix1, 10, alp);
4225 matrix2=rescale_matrix(matrix2, 10, alp);
4227 for (c=0, a=0.1; a< 2; a+=0.05)
4229 fprintf ( stderr, "Lambda=%.2f\n", a);
4230 mat=duplicate_int (matrix2,-1,-1);
4231 mat=rescale_matrix(mat, a, alp);
4232 quality=compare_two_mat_array(matrix1,mat, n, len,alp);
4233 quality=MAX((-quality),quality);
4235 if (c==0 || (best_quality>quality))
4238 fprintf ( stderr, "*");
4239 best_quality=quality;
4244 evaluate_random_match2(mat, 1000, 100, alp);
4245 evaluate_random_match2(matrix1, 1000, 100, alp);
4253 float compare_two_mat_array (int **matrix1,int **matrix2, int n, int len,char *alp)
4255 int a, b, c, d, c1, c2, tot;
4258 float delta_random=0;
4259 float delta2_random=0;
4265 float delta2_good=0;
4277 freq=set_aa_frequencies ();
4278 list=vcalloc ( 10000, sizeof (char));
4281 for (tot=0,c=0,a=0;a<20; a++)
4283 b=freq[alp[a]-'A']*1000;
4285 for (d=0; d<b; d++, c++)
4292 for (a=0; a< len*n; a++)
4296 delta=matrix1[list[c1]-'A'][list[c2]-'A']-matrix2[list[c1]-'A'][list[c2]-'A'];
4297 delta_random+=delta;
4298 delta2_random+=MAX(delta,(-delta));
4300 delta=matrix1[list[c1]-'A'][list[c1]-'A']-matrix2[list[c1]-'A'][list[c1]-'A'];
4302 delta2_id+=MAX(delta,(-delta));
4304 while (tot_len< len*n)
4309 if ( matrix1[list[c1]-'A'][list[c2]-'A']>=0 || matrix2[list[c1]-'A'][list[c2]-'A'] )
4311 delta=matrix1[list[c1]-'A'][list[c2]-'A']-matrix2[list[c1]-'A'][list[c2]-'A'];
4313 delta2_good+=MAX(delta,(-delta));
4319 delta_random=delta_random/tot_len;
4320 delta2_random=delta2_random/tot_len;
4323 delta_id=delta_id/tot_len;
4324 delta2_id=delta2_id/tot_len;
4326 delta_good=delta_good/tot_len;
4327 delta2_good=delta2_good/tot_len;
4330 fprintf ( stderr, "\tRand=%8.3f %8.3f\n\tId =%8.3f %8.3f\n\tGood=%8.3f %8.3f\n",delta_random, delta2_random, delta_id,delta2_id, delta_good,delta2_good);
4337 int ** rescale_matrix ( int **matrix, float lambda, char *alp)
4342 for ( a=0; a< 20; a++)
4343 for ( b=0; b< 20; b++)
4345 matrix[alp[a]-'A'][alp[b]-'A']= matrix[alp[a]-'A'][alp[b]-'A']*lambda;
4349 int **mat2inverted_mat (int **matrix, char *alp)
4351 int a, b, min, max, v,l;
4354 l=(int)strlen (alp);
4355 min=max=matrix[alp[0]-'A'][alp[0]-'A'];
4356 for ( a=0; a<l; a++)
4357 for ( b=0; b< l; b++)
4359 v=matrix[alp[a]-'A'][alp[b]-'A'];
4363 for ( a=0; a<l; a++)
4364 for ( b=0; b< l; b++)
4366 v=matrix[alp[a]-'A'][alp[b]-'A'];
4369 c1=tolower(alp[a])-'A';
4370 c2=tolower(alp[b])-'A';
4372 C1=toupper(alp[a])-'A';
4373 C2=toupper(alp[b])-'A';
4374 matrix[C1][C2]=matrix[c1][c2]=matrix[C1][c2]=matrix[c1][C2]=v;
4378 void output_matrix_header ( char *name, int **matrix, char *alp)
4384 nalp=vcalloc ( 1000, sizeof (char));
4388 sprintf ( nalp, "ABCDEFGHIKLMNPQRSTVWXYZ");
4389 l=(int)strlen (nalp);
4391 fp=vfopen ( name, "w");
4392 fprintf (fp, "\nint []={\n");
4396 for ( b=0; b<=a; b++)
4397 fprintf ( fp, "%3d, ",matrix[nalp[a]-'A'][nalp[b]-'A']);
4398 fprintf ( fp, "\n");
4400 fprintf (fp, "};\n");
4406 float ** initialise_aa_physico_chemical_property_table (int *n)
4416 fp=vfopen ("properties.txt", "r");
4417 while ((c=fgetc(fp))!=EOF)n[0]+=(c=='#');
4422 p=declare_float ( n[0], 'z'+1);
4423 fp=vfopen ("properties.txt", "r");
4425 while ((c=fgetc(fp))!=EOF)
4427 if (c=='#'){in=0;while (fgetc(fp)!='\n');}
4430 if ( !in){in=1; ++n[0];}
4431 fscanf (fp, "%f %c %*s",&v1, &c1 );
4432 p[n[0]-1][tolower(c1)]=v1;
4433 while ( (c=fgetc(fp))!='\n');
4438 /*rescale so that Delta max=10*/
4439 for (a=0; a<n[0]; a++)
4442 for (b='a'; b<='z'; b++)
4444 min=(p[a][b]<min)?p[a][b]:min;
4445 max=(p[a][b]>max)?p[a][b]:max;
4447 for (b='a'; b<='z'; b++)
4449 p[a][b]=((p[a][b]-min)/(max-min))*10;
4456 Constraint_list * choose_extension_mode ( char *extend_mode, Constraint_list *CL)
4460 fprintf ( stderr, "\nWarning: CL was not set");
4463 else if ( strm ( extend_mode, "rna0"))
4465 CL->evaluate_residue_pair=residue_pair_extended_list;
4466 CL->get_dp_cost =slow_get_dp_cost;
4468 else if ( strm ( extend_mode, "rna1") || strm (extend_mode, "rna"))
4470 CL->evaluate_residue_pair=residue_pair_extended_list4rna1;
4471 CL->get_dp_cost =slow_get_dp_cost;
4473 else if ( strm ( extend_mode, "rna2"))
4475 CL->evaluate_residue_pair=residue_pair_extended_list4rna2;
4476 CL->get_dp_cost =slow_get_dp_cost;
4478 else if ( strm ( extend_mode, "rna3"))
4480 CL->evaluate_residue_pair=residue_pair_extended_list4rna3;
4481 CL->get_dp_cost =slow_get_dp_cost;
4483 else if ( strm ( extend_mode, "rna4"))
4485 CL->evaluate_residue_pair=residue_pair_extended_list4rna4;
4486 CL->get_dp_cost =slow_get_dp_cost;
4488 else if ( strm ( extend_mode, "pc") && !CL->M)
4490 CL->evaluate_residue_pair=residue_pair_extended_list_pc;
4491 CL->get_dp_cost =slow_get_dp_cost;
4493 else if ( strm ( extend_mode, "triplet") && !CL->M)
4495 CL->evaluate_residue_pair=residue_pair_extended_list;
4496 CL->get_dp_cost =get_dp_cost;
4498 else if ( strm ( extend_mode, "relative_triplet") && !CL->M)
4500 CL->evaluate_residue_pair=residue_pair_relative_extended_list;
4501 CL->get_dp_cost =fast_get_dp_cost_2;
4503 else if ( strm ( extend_mode, "g_coffee") && !CL->M)
4505 CL->evaluate_residue_pair=residue_pair_extended_list_g_coffee;
4506 CL->get_dp_cost =slow_get_dp_cost;
4508 else if ( strm ( extend_mode, "g_coffee_quadruplets") && !CL->M)
4510 CL->evaluate_residue_pair=residue_pair_extended_list_g_coffee_quadruplet;
4511 CL->get_dp_cost =slow_get_dp_cost;
4513 else if ( strm ( extend_mode, "fast_triplet") && !CL->M)
4515 CL->evaluate_residue_pair=residue_pair_extended_list;
4516 CL->get_dp_cost =fast_get_dp_cost;
4518 else if ( strm ( extend_mode, "test_triplet") && !CL->M)
4520 CL->evaluate_residue_pair=residue_pair_extended_list;
4521 CL->get_dp_cost =fast_get_dp_cost_3;
4523 else if ( strm ( extend_mode, "very_fast_triplet") && !CL->M)
4525 CL->evaluate_residue_pair=residue_pair_extended_list;
4526 CL->get_dp_cost =fast_get_dp_cost_2;
4528 else if ( strm ( extend_mode, "slow_triplet") && !CL->M)
4530 CL->evaluate_residue_pair=residue_pair_extended_list;
4531 CL->get_dp_cost =slow_get_dp_cost;
4533 else if ( strm ( extend_mode, "mixt") && !CL->M)
4535 CL->evaluate_residue_pair=residue_pair_extended_list_mixt;
4536 CL->get_dp_cost=slow_get_dp_cost;
4538 else if ( strm ( extend_mode, "quadruplet") && !CL->M)
4540 CL->evaluate_residue_pair=residue_pair_extended_list_quadruplet;
4541 CL->get_dp_cost =get_dp_cost_quadruplet;
4543 else if ( strm ( extend_mode, "test") && !CL->M)
4545 CL->evaluate_residue_pair=residue_pair_test_function;
4546 CL->get_dp_cost =slow_get_dp_cost_test;
4548 else if ( strm ( extend_mode, "ssp"))
4551 CL->evaluate_residue_pair=evaluate_ssp_matrix_score;
4552 CL->get_dp_cost=slow_get_dp_cost;
4555 else if ( strm ( extend_mode, "tm"))
4558 CL->evaluate_residue_pair=evaluate_tm_matrix_score;
4559 CL->get_dp_cost=slow_get_dp_cost;
4562 else if ( strm ( extend_mode, "matrix"))
4565 CL->evaluate_residue_pair=evaluate_matrix_score;
4566 CL->get_dp_cost=cw_profile_get_dp_cost;
4569 else if ( strm ( extend_mode, "curvature"))
4571 CL->evaluate_residue_pair=evaluate_curvature_score;
4572 CL->get_dp_cost=slow_get_dp_cost;
4577 CL->evaluate_residue_pair=evaluate_matrix_score;
4578 CL->get_dp_cost=cw_profile_get_dp_cost;
4583 fprintf ( stderr, "\nERROR: %s is an unknown extend_mode[FATAL:%s]\n", extend_mode, PROGRAM);
4584 myexit (EXIT_FAILURE);
4588 int ** combine_two_matrices ( int **mat1, int **mat2)
4590 int naa, re1, re2, Re1, Re2, a, b, u, l;
4592 naa=(int)strlen (BLAST_AA_ALPHABET);
4593 for ( a=0; a< naa; a++)
4594 for ( b=0; b< naa; b++)
4596 re1=BLAST_AA_ALPHABET[a];
4597 re2=BLAST_AA_ALPHABET[b];
4598 if (re1=='*' || re2=='*');
4602 Re1=toupper(re1);Re2=toupper(re2);
4603 re1-='A';re2-='A';Re1-='A';Re2-='A';
4607 mat1[re1][re2]=mat2[re1][re2]=l;
4608 mat2[Re1][Re2]=mat2[Re1][Re2]=u;
4614 /* Off the shelves evaluations */
4615 /*********************************************************************************************/
4617 /* OFF THE SHELVES EVALUATION */
4619 /*********************************************************************************************/
4622 int lat_sum_pair (Alignment *A, char *mat)
4624 int a,b,c, tot=0, v1, v2, score;
4627 matrix=read_matrice (mat);
4629 for (a=0; a<A->nseq; a++)
4630 for ( b=0; b<A->nseq; b++)
4632 for (c=1; c<A->len_aln; c++)
4636 r11=A->seq_al[a][c-1];
4637 r12=A->seq_al[a][c];
4638 if (is_gap(r11) || is_gap(r12))continue;
4639 else v1=matrix[r11-'A'][r12-'A'];
4641 r11=A->seq_al[b][c-1];
4642 r12=A->seq_al[b][c];
4643 if (is_gap(r11) || is_gap(r12))continue;
4644 else v2=matrix[r11-'A'][r12-'A'];
4646 score+=(v1-v2)*(v1-v2);
4650 score=(100*score)/tot;
4651 return (float)score;
4656 /* Off the shelves evaluations */
4657 /*********************************************************************************************/
4659 /* OFF THE SHELVES EVALUATION */
4661 /*********************************************************************************************/
4663 int comp_pair ( int len,char *sa, char *sb, int seqA, int seqB,int *tgp_a, int *tgp_b,int gap_op,int gap_ex, int start, int end,int **matrix,int MODE);
4664 int score_gap ( int len, char *sa, char *sb,int seqA, int seqB,int *tgp_a, int *tgp_b, int op, int ex, int start, int end, int MODE);
4665 void evaluate_tgp_decoded_chromosome ( Alignment *A,int **TGP,int start, int end,int MODE);
4666 int gap_type ( char a, char b);
4670 float sum_pair ( Alignment*A,char *mat_name, int gap_op, int gap_ex)
4681 matrix=read_matrice (mat_name);
4682 matrix=mat2inverted_mat (matrix, "acdefghiklmnpqrstvwy");
4688 tgp= declare_int (A->nseq,2);
4690 evaluate_tgp_decoded_chromosome ( A,tgp,start, end,MODE);
4692 for ( a=0; a< A->nseq-1; a++)
4693 for (b=a+1; b<A->nseq; b++)
4695 pscore= comp_pair (A->len_aln,A->seq_al[a], A->seq_al[b],a, b,tgp[a], tgp[b],gap_op,gap_ex, start, end,matrix, MODE);
4697 /*score+=(double)pscore*(int)(PARAM->OFP)->weight[A->order[a][0]][A->order[b][0]];*//*NO WEIGHTS*/
4700 score=score/(A->nseq*A->nseq);
4702 return (float)score;
4705 int comp_pair ( int len,char *sa, char *sb, int seqA, int seqB,int *tgp_a, int *tgp_b,int gap_op,int gap_ex, int start, int end,int **matrix,int MODE)
4712 score+= score_gap (len, sa,sb, seqA, seqB,tgp_a, tgp_b, gap_op,gap_ex, start, end,MODE);
4717 for (a=start; a<=end; a++)
4719 if ( is_gap(sa[a]) || is_gap(sb[a]))
4721 if (is_gap(sa[a]) && is_gap(sb[a]));
4730 score += matrix [sa[a]-'A'][sb[a]-'A'];
4736 int score_gap ( int len, char *sa, char *sb,int seqA, int seqB,int *tgp_a, int *tgp_b, int op, int ex, int start, int end, int MODE)
4743 int right_gap, left_gap;
4753 int sequence_pattern[2][3];
4757 /*op= gor_gap_op ( 0,seqA, seqB, PARAM);
4758 ex= gor_gap_ext ( 0, seqA, seqB, PARAM);*/
4762 for (a=start; a<=end; ++a)
4765 type= gap_type ( sa[a], sb[a]);
4767 if ( type==2 && ga<=gb)
4772 else if (type==1 && ga >=gb)
4794 else if ( (type==flag1) && flag2==1)
4799 else if ( (type!=flag1) && flag2==1)
4808 /*gap_type -/-:0, X/X:-1 X/-:1, -/X:2*/
4809 /*evaluate the pattern of gaps*/
4812 sequence_pattern[0][0]=sequence_pattern[1][0]=0;
4813 for ( a=start; a<=end && continue_loop==1; a++)
4815 left_gap= gap_type ( sa[a], sb[a]);
4820 sequence_pattern[0][0]=sequence_pattern[1][0]=0;
4826 for (b=a; b<=end && continue_loop==1; b++)
4827 {type=gap_type( sa[b], sb[b]);
4830 if ( type!=left_gap && type !=0)
4833 sequence_pattern[2-left_gap][0]= b-a-null_gap;
4834 sequence_pattern [1-(2-left_gap)][0]=0;
4837 if ( continue_loop==1)
4840 sequence_pattern[2-left_gap][0]= b-a-null_gap;
4841 sequence_pattern [1-(2-left_gap)][0]=0;
4847 sequence_pattern[0][2]=sequence_pattern[1][2]=1;
4848 for ( a=start; a<=end; a++)
4850 if ( !is_gap(sa[a]))
4851 sequence_pattern[0][2]=0;
4852 if ( !is_gap(sb[a]))
4853 sequence_pattern[1][2]=0;
4857 sequence_pattern[0][1]=sequence_pattern[1][1]=0;
4858 for ( a=end; a>=start && continue_loop==1; a--)
4860 right_gap= gap_type ( sa[a], sb[a]);
4865 sequence_pattern[0][1]=sequence_pattern[1][1]=0;
4871 for (b=a; b>=start && continue_loop==1; b--)
4872 {type=gap_type( sa[b], sb[b]);
4875 if ( type!=right_gap && type !=0)
4878 sequence_pattern[2-right_gap][1]= a-b-null_gap;
4879 sequence_pattern [1-(2-right_gap)][1]=0;
4882 if ( continue_loop==1)
4885 sequence_pattern[2-right_gap][1]= a-b-null_gap;
4886 sequence_pattern [1-(2-right_gap)][1]=0;
4893 printf ( "\n*****************************************************");
4894 printf ( "\n%c\n%c", sa[start],sb[start]);
4895 printf ( "\n%d %d %d",sequence_pattern[0][0] ,sequence_pattern[0][1], sequence_pattern[0][2]);
4896 printf ( "\n%d %d %d",sequence_pattern[1][0] ,sequence_pattern[1][1], sequence_pattern[1][2]);
4897 printf ( "\n*****************************************************");
4900 /*correct the scoring*/
4905 if ( FABS(tgp_a[0])>1 && (FABS(tgp_a[0])>FABS( tgp_b[0])))
4906 score-= (sequence_pattern[0][0]>0)?op:0;
4907 if ( FABS(tgp_b[0])>1 && (FABS(tgp_b[0])> FABS(tgp_a[0])))
4908 score-= (sequence_pattern[1][0]>0)?op:0;
4910 else if ( MODE ==1 || MODE ==2)
4912 if ( FABS(tgp_a[0])>1 && (FABS(tgp_a[0])>FABS( tgp_b[0])) && (tgp_a[1]!=1 || sequence_pattern[0][2]==0))
4913 score-= (sequence_pattern[0][0]>0)?op:0;
4914 if ( FABS(tgp_b[0])>1 && (FABS(tgp_b[0])> FABS(tgp_a[0])) && (tgp_b[1]!=1 || sequence_pattern[1][2]==0))
4915 score-= (sequence_pattern[1][0]>0)?op:0;
4918 if ( tgp_a[0]>=1 && tgp_a[0]==tgp_b[0])
4919 score -=(sequence_pattern[0][0]>0)?op:0;
4920 if ( tgp_b[0]>=1 && tgp_a[0]==tgp_b[0])
4921 score-= (sequence_pattern[1][0]>0)?op:0;
4924 if ( tgp_a[1]==1 && sequence_pattern[0][2]==0)
4925 score -= ( sequence_pattern[0][1]>0)?op:0;
4926 else if ( tgp_a[1]==1 && sequence_pattern[0][2]==1 && tgp_a[0]<=0)
4927 score -= ( sequence_pattern[0][1]>0)?op:0;
4930 if ( tgp_b[1]==1 && sequence_pattern[1][2]==0)
4931 score -= ( sequence_pattern[1][1]>0)?op:0;
4932 else if ( tgp_b[1]==1 && sequence_pattern[1][2]==1 && tgp_b[0]<=0)
4933 score -= ( sequence_pattern[1][1]>0)?op:0;
4938 score -=sequence_pattern[0][0]*ex;
4940 score -= sequence_pattern[1][0]*ex;
4942 score-=sequence_pattern[0][1]*ex;
4944 score-=sequence_pattern[1][1]*ex;
4954 void evaluate_tgp_decoded_chromosome ( Alignment *A,int **TGP,int start, int end,int MODE)
4961 if (MODE==11 || MODE==13|| MODE==14)
4963 if ( start==0)for ( a=0; a<A->nseq; a++)TGP[a][0]=-1;
4964 else for ( a=0; a<A->nseq; a++)TGP[a][0]=(is_gap(A->seq_al[a][start-1])==1)?0:1;
4966 if ( end==A->len_aln-1)for ( a=0; a<A->nseq; a++)TGP[a][1]=-1;
4967 else for ( a=0; a<A->nseq; a++)TGP[a][1]=(is_gap(A->seq_al[a][start-1])==1)?0:1;
4971 /* 0: in the middle of the alignement
4973 2: q left gap is the continuation of another gap that was open outside the bloc ( don't open it)
4976 for ( a=0; a< A->nseq; a++)
4980 for ( b=0; b< start; b++)
4981 if ( !is_gap(A->seq_al[a][b]))
4985 if (is_gap(A->seq_al[a][start-1]) && TGP[a][0]!=1)
4988 for ( b=(start-1); b>=0 && continue_loop==1; b--)
4989 {TGP[a][0]-= ( is_gap(A->seq_al[a][b])==1)?1:0;
4990 continue_loop= (is_gap(A->seq_al[a][b])==1)?continue_loop:0;
4994 else if (is_gap(A->seq_al[a][start-1]) && TGP[a][0]==1)
4998 for ( b=(start-1); b>=0 && continue_loop==1; b--)
4999 {TGP[a][0]+= ( is_gap(A->seq_al[a][b])==1)?1:0;
5000 continue_loop= (is_gap(A->seq_al[a][b])==1)?continue_loop:0;
5003 for ( b=(A->len_aln-1); b>end; b--)
5004 if ( !is_gap(A->seq_al[a][b]))
5009 int gap_type ( char a, char b)
5011 /*gap_type -/-:0, X/X:-1 X/-:1, -/STAR:2*/
5013 if ( is_gap(a) && is_gap(b))
5015 else if ( !is_gap(a) && !is_gap(b))
5017 else if ( !is_gap(a))
5019 else if ( !is_gap(b))
5025 /*********************************COPYRIGHT NOTICE**********************************/
5026 /*© Centro de Regulacio Genomica */
5028 /*Cedric Notredame */
5029 /*Tue Oct 27 10:12:26 WEST 2009. */
5030 /*All rights reserved.*/
5031 /*This file is part of T-COFFEE.*/
5033 /* T-COFFEE is free software; you can redistribute it and/or modify*/
5034 /* it under the terms of the GNU General Public License as published by*/
5035 /* the Free Software Foundation; either version 2 of the License, or*/
5036 /* (at your option) any later version.*/
5038 /* T-COFFEE is distributed in the hope that it will be useful,*/
5039 /* but WITHOUT ANY WARRANTY; without even the implied warranty of*/
5040 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/
5041 /* GNU General Public License for more details.*/
5043 /* You should have received a copy of the GNU General Public License*/
5044 /* along with Foobar; if not, write to the Free Software*/
5045 /* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/
5046 /*............................................... |*/
5047 /* If you need some more information*/
5048 /* cedric.notredame@europe.com*/
5049 /*............................................... |*/
5053 /*********************************COPYRIGHT NOTICE**********************************/