+++ /dev/null
-#include <stdio.h>
-#include <stdlib.h>
-#include <math.h>
-#include <stdarg.h>
-#include <string.h>
-#include <ctype.h>
-#include "io_lib_header.h"
-#include "util_lib_header.h"
-#include "define_header.h"
-
-#include "dp_lib_header.h"
-float compute_lambda (int **matrix,char *alphabet);
-/*********************************************************************************************/
-/* */
-/* FUNCTIONS FOR EVALUATING THE CONSISTENCY BETWEEN ALN AND CL */
-/* */
-/*********************************************************************************************/
-
-/*Fast: score= extended_res/max_extended_residue_for the whole aln
- slow: score= extended_res/sum all extended score for that residue
- non_extended score= non_ext /sum all non extended score for that residue
- heuristic score= extended /sum of extended score of all pairs in the library
- (i.e. Not ALL the possible pairs)
-*/
-Alignment * main_coffee_evaluate_output2 ( Alignment *IN,Constraint_list *CL, const char *mode );
-int sub_aln2ecl_raw_score (Alignment *A, Constraint_list *CL, int ns, int *ls)
-{
- int **pos;
- int p1,r1, r2, s1, s2;
- int score=0;
-
- if ( !A) return 0;
- A=reorder_aln (A, (CL->S)->name,(CL->S)->nseq);
- pos=aln2pos_simple ( A,A->nseq);
-
-
-
- for (p1=0; p1<A->len_aln; p1++)
- {
- for (s1=0; s1<ns-1; s1++)
- {
- for (s2=s1+1; s2<ns; s2++)
- {
-
- r1=pos[ls[s1]][p1];
- r2=pos[ls[s2]][p1];
- if (r1>0 && r2>0)
- {
- score+= residue_pair_extended_list_pc (CL,ls[s1], r1, ls[s2], r2)*SCORE_K;
- }
- }
- }
- }
- free_int (pos, -1);
- return score;
- return (score/(((ns*ns)-ns)/2))/A->len_aln;
-}
-int aln2ecl_raw_score (Alignment *A, Constraint_list *CL)
-{
- int **pos;
- int p1,r1, r2, s1, s2;
- int score=0;
- if ( !A) return 0;
- A=reorder_aln (A, (CL->S)->name,(CL->S)->nseq);
- pos=aln2pos_simple ( A,A->nseq);
-
-
-
- for (p1=0; p1<A->len_aln; p1++)
- {
- for (s1=0; s1<A->nseq-1; s1++)
- {
- for (s2=s1+1; s2<A->nseq; s2++)
- {
-
- r1=pos[s1][p1];
- r2=pos[s2][p1];
- if (r1>0 && r2>0)score+= residue_pair_extended_list_pc (CL,s1, r1, s2, r2);
- }
- }
- }
- free_int (pos, -1);
- return score;
- return (score/(((A->nseq*A->nseq)-A->nseq)/2))/A->len_aln;
-}
-int node2sub_aln_score (Alignment *A,Constraint_list *CL, char *mode, NT_node T)
-{
- if ( !T || !T->right ||!T->left)return 0;
- else
- {
- int *ns;
- int **ls;
- ns=vcalloc (2, sizeof (int));
- ls=vcalloc (2, sizeof (int*));
- ns[0]= (T->left)->nseq;
- ns[1]=(T->right)->nseq;
- ls[0]= (T->left)->lseq;
- ls[1]=(T->right)->lseq;
-
- return sub_aln2sub_aln_score (A, CL, mode, ns, ls);
- }
- return -1;
-}
-int sub_aln2sub_aln_score ( Alignment *A,Constraint_list *CL, const char *mode, int *ns, int **ls)
-{
- /*Warning: Does Not Take Gaps into account*/
-
- int **pos;
- int a;
- float score=0;
-
- /*Make sure evaluation functions update their cache if needed*/
- A=update_aln_random_tag (A);
- pos=aln2pos_simple ( A, -1, ns, ls);
- for (a=0; a< A->len_aln; a++)
- score+=CL->get_dp_cost (A, pos, ns[0], ls[0], a, pos, ns[1],ls[1], a, CL);
- free_int (pos, -1);
-
- score=(int)(((float)score)/(A->len_aln*SCORE_K));
- score=(int)(CL->residue_index && CL->normalise)?((score*MAXID)/(CL->normalise)):(score);
- return (int)score;
-}
-int sub_aln2sub_aln_raw_score ( Alignment *A,Constraint_list *CL, const char *mode, int *ns, int **ls)
-{
- /*Warning: Does Not Take Gaps into account*/
-
- int **pos;
- int a;
- float score=0;
-
- /*Make sure evaluation functions update their cache if needed*/
- A=update_aln_random_tag (A);
- pos=aln2pos_simple ( A, -1, ns, ls);
- for (a=0; a< A->len_aln; a++)
- score+=CL->get_dp_cost (A, pos, ns[0], ls[0], a, pos, ns[1],ls[1], a, CL);
- free_int (pos, -1);
- return (int) score;
-}
-
-Alignment* main_coffee_evaluate_output_sub_aln ( Alignment *A,Constraint_list *CL, const char *mode, int *n_s, int **l_s)
-{
- Alignment *SUB1, *SUB2, *SUB3;
- int a, b, c,*list_seq;
-
-
- if (strm ( CL->evaluate_mode, "no"))return NULL;
- else
- {
- list_seq=vcalloc (n_s[0]+n_s[1], sizeof (int));
- for (b=0, a=0; a< 2; a++){for (c=0;c< n_s[a]; c++)list_seq[b++]=l_s[a][c];}
-
-
- SUB1=copy_aln (A, NULL);
- SUB2=extract_sub_aln (SUB1,n_s[0]+n_s[1],list_seq);
- SUB3=main_coffee_evaluate_output (SUB2,CL,CL->evaluate_mode);
- free_aln (SUB1);
- free_aln (SUB2);
- vfree (list_seq);
-
- return SUB3;
- }
-}
-Alignment * overlay_alignment_evaluation ( Alignment *I, Alignment *O)
-{
- int a, b, c, r, i;
- int *buf;
-
- if ( !I || !O) return O;
- if ( I->len_aln!=O->len_aln)printf_exit (EXIT_FAILURE, stderr, "ERROR: Incompatible alignments in overlay_alignment_evaluation");
-
- buf=vcalloc ( MAX(I->len_aln, O->len_aln), sizeof (int));
-
- for (a=0; a<O->nseq; a++)
- {
- if (!strm (I->name[a], O->name[a]))printf_exit (EXIT_FAILURE, stderr, "ERROR: Incompatible alignments in overlay_alignment_evaluation");
- for (b=0; b<O->len_aln; b++)
- {
- r=I->seq_al[a][b];
- if ( islower(r))O->seq_al[a][b]=0;
- else if (r<=9 || (r>='0' && r<='9'))O->seq_al[a][b]=I->seq_al[a][b];
- }
- }
- return O;
-}
-
-Alignment * main_coffee_evaluate_output ( Alignment *IN,Constraint_list *CL, const char *mode )
-{
-
- Alignment *TopS=NULL, *LastS=NULL, *CurrentS=NULL;
-
-
- if ( IN->A)IN=IN->A;
- while (IN)
- {
-
- CurrentS= main_coffee_evaluate_output2(IN, CL, mode);
- if (!TopS)LastS=TopS=CurrentS;
- else
- {
- LastS->A=CurrentS;
- LastS=CurrentS;
- }
- IN=IN->A;
- }
- return TopS;
-}
-
-Alignment * main_coffee_evaluate_output2 ( Alignment *IN,Constraint_list *CL, const char *mode )
-{
-
- /*Make sure evaluation functions update their cache if needed*/
- IN=update_aln_random_tag (IN);
-
- if ( CL->evaluate_residue_pair==evaluate_matrix_score || CL->ne==0 ||strm ( mode , "categories") || strm ( mode , "matrix")|| strm(mode, "sar")|| strstr (mode, "boxshade") )
- {
-
- if ( strm ( mode , "categories")) return categories_evaluate_output (IN, CL);
- else if ( strm ( mode , "matrix"))return matrix_evaluate_output (IN, CL);
- else if ( strm ( mode, "sar"))return sar_evaluate_output (IN, CL);
- else if ( strstr ( mode, "boxshade"))return boxshade_evaluate_output (IN, CL, atoi (strstr(mode, "_")+1));
-
- else if ( CL->evaluate_residue_pair==evaluate_matrix_score) return matrix_evaluate_output (IN, CL);
- else if ( CL->ne==0) return matrix_evaluate_output (IN, CL);
- }
- else if ( strm (mode, "no"))return NULL;
- else if ( strstr( mode, "triplet"))
- {
- return triplet_coffee_evaluate_output ( IN,CL);
- }
- else if ( strstr ( mode, "fast"))
- {
- return fast_coffee_evaluate_output ( IN,CL);
- }
- else if ( strstr ( mode, "slow"))
- {
- return slow_coffee_evaluate_output ( IN,CL);
- }
-
- else if ( strstr ( mode, "non_extended"))
- {
- return non_extended_t_coffee_evaluate_output ( IN,CL);
- }
-
- else if ( strm (mode, "sequences"))
- {
- return coffee_seq_evaluate_output ( IN,CL);
- }
- else
- {
- fprintf ( stderr, "\nUNKNOWN MODE FOR ALIGNMENT EVALUATION: *%s* [FATAL:%s]",mode, PROGRAM);
- crash ("");
- return NULL;
- }
- return IN;
-}
-
-
-
-Alignment * coffee_evaluate_output ( Alignment *IN,Constraint_list *CL)
- {
- fprintf ( stderr, "\n[WARNING:%s]THE FUNCTION coffee_evaluate_output IS NOT ANYMORE SUPPORTED\n", PROGRAM);
- fprintf ( stderr, "\n[WARNING]fast_coffee_evaluate_output WILL BE USED INSTEAD\n");
-
- return fast_coffee_evaluate_output (IN,CL);
- }
-Alignment * matrix_evaluate_output ( Alignment *IN,Constraint_list *CL)
- {
- int a,b, c,r, s, r1, r2;
- Alignment *OUT=NULL;
-
- double **tot_res;
- double **max_res;
-
- double **tot_seq;
- double **max_seq;
-
- double **tot_col;
- double **max_col;
-
- double max_aln=0;
- double tot_aln=0;
-
-
- /*
- Residue x: sum of observed extended X.. /sum of possible X..
- */
-
-
- if ( !CL->M)CL->M=read_matrice ("blosum62mt");
-
- OUT=copy_aln (IN, OUT);
-
-
- tot_res=declare_double ( IN->nseq, IN->len_aln);
- max_res=declare_double ( IN->nseq, IN->len_aln);
-
- tot_seq=declare_double ( IN->nseq, 1);
- max_seq=declare_double ( IN->nseq, 1);
- tot_col=declare_double ( IN->len_aln,1);
- max_col=declare_double ( IN->len_aln,1);
-
- max_aln=tot_aln=0;
-
- for (a=0; a< IN->len_aln; a++)
- {
- for ( b=0; b< IN->nseq; b++)
- {
- r1=tolower(IN->seq_al[b][a]);
- if ( is_gap(r1))continue;
- r= CL->M[r1-'A'][r1-'A'];
- r= 1;
- for ( c=0; c<IN->nseq; c++)
- {
- r2=tolower(IN->seq_al[c][a]);
- if (b==c || is_gap (r2))continue;
-
- s=CL->M[r2-'A'][r1-'A'];
- s=(s<=0)?0:1;
-
- tot_res[b][a]+=s;
- max_res[b][a]+=r;
-
- tot_col[a][0]+=s;
- max_col[a][0]+=r;
-
- tot_seq[b][0]+=s;
- max_seq[b][0]+=r;
-
- tot_aln+=s;
- max_aln+=r;
- }
- }
- }
-
-
- for ( a=0; a< IN->nseq; a++)
- {
- if ( !max_seq[a][0])continue;
-
- OUT->score_seq[a]=(tot_seq[a][0]*100)/max_seq[a][0];
- for (b=0; b< IN->len_aln; b++)
- {
- r1=IN->seq_al[a][b];
- if ( is_gap(r1) || !max_res[a][b])continue;
-
- r1=(tot_res[a][b]*10)/max_res[a][b];
- r1=(r1>=10)?9:r1;
- r1=r1<0?0:r1;
- (OUT)->seq_al[a][b]=r1+'0';
- }
- }
-
- for ( a=0; a< IN->len_aln; a++)
- {
- r1=(max_col[a][0]==0)?0:((tot_col[a][0]*10)/max_col[a][0]);
- r1=(r1>=10)?9:r1;
- (OUT)->seq_al[OUT->nseq][a]=r1+'0';
- }
- sprintf ( OUT->name[IN->nseq], "cons");
- if (max_aln)OUT->score_seq[OUT->nseq]=OUT->score_aln=(100*tot_aln)/max_aln;
-
-
- free_double (tot_res,-1);
- free_double (max_res,-1);
-
- free_double (tot_seq,-1);
- free_double (max_seq,-1);
-
- return OUT;
- }
-
-Alignment * sar_evaluate_output ( Alignment *IN,Constraint_list *CL)
- {
- int a,b, c,r, s, r1, r2;
- Alignment *OUT=NULL;
-
- double **tot_res;
- double **max_res;
-
- double **tot_seq;
- double **max_seq;
-
- double **tot_col;
- double **max_col;
-
- double max_aln=0;
- double tot_aln=0;
-
-
- /*
- Residue x: sum of observed extended X.. /sum of possible X..
- */
-
-
- if ( !CL->M)CL->M=read_matrice ("blosum62mt");
-
- OUT=copy_aln (IN, OUT);
-
-
- tot_res=declare_double ( IN->nseq, IN->len_aln);
- max_res=declare_double ( IN->nseq, IN->len_aln);
-
- tot_seq=declare_double ( IN->nseq, 1);
- max_seq=declare_double ( IN->nseq, 1);
- tot_col=declare_double ( IN->len_aln,1);
- max_col=declare_double ( IN->len_aln,1);
-
- max_aln=tot_aln=0;
-
- for (a=0; a< IN->len_aln; a++)
- {
- for (b=0; b< IN->nseq; b++)
- {
- r1=tolower(IN->seq_al[b][a]);
- for (c=0; c<IN->nseq; c++)
- {
- r2=tolower(IN->seq_al[c][a]);
- if (b==c)continue;
-
- if ( is_gap(r1) && is_gap(r2))s=0;
- else s=(r1==r2)?1:0;
-
- r=1;
-
-
- tot_res[b][a]+=s;
- max_res[b][a]+=r;
-
- tot_col[a][0]+=s;
- max_col[a][0]+=r;
-
- tot_seq[b][0]+=s;
- max_seq[b][0]+=r;
-
- tot_aln+=s;
- max_aln+=r;
- }
- }
- }
-
- for ( a=0; a< IN->nseq; a++)
- {
- if ( !max_seq[a][0])continue;
- OUT->score_seq[a]=(max_seq[a][0]*100)/max_seq[a][0];
- for (b=0; b< IN->len_aln; b++)
- {
- r1=IN->seq_al[a][b];
- if ( is_gap(r1) || !max_res[a][b])continue;
- r1=(tot_res[a][b]*10)/max_res[a][b];
- r1=(r1>=10)?9:r1;
- r1=r1<0?0:r1;
- (OUT)->seq_al[a][b]=r1+'0';
- }
- }
-
- for ( a=0; a< IN->len_aln; a++)
- {
- r1=(max_col[a][0]==0)?0:((tot_col[a][0]*10)/max_col[a][0]);
- r1=(r1>=10)?9:r1;
- (OUT)->seq_al[OUT->nseq][a]=r1+'0';
- }
- sprintf ( OUT->name[IN->nseq], "cons");
- if (max_aln)OUT->score_aln=(100*tot_aln)/max_aln;
-
-
- free_double (tot_res,-1);
- free_double (max_res,-1);
-
- free_double (tot_seq,-1);
- free_double (max_seq,-1);
-
- return OUT;
- }
-Alignment * boxshade_evaluate_output ( Alignment *IN,Constraint_list *CL, int T)
- {
- Alignment *OUT=NULL;
- int **aa;
- int r,br, bs, a, b;
- float f;
-
-
- /*
- Residue x: sum of observed extended X.. /sum of possible X..
- */
-
-
- OUT=copy_aln (IN, OUT);
- aa=declare_int (26, 2);
-
- for ( a=0; a< OUT->len_aln; a++)
- {
- for ( b=0; b< 26; b++){aa[b][1]=0;aa[b][0]='a'+b;}
- for ( b=0; b< OUT->nseq; b++)
- {
- r=tolower(OUT->seq_al[b][a]);
- if ( !is_gap(r))aa[r-'a'][1]++;
- }
- sort_int ( aa, 2, 1, 0,25);
- f=(aa[25][1]*100)/OUT->nseq;
-
- if (f<T);
- else
- {
- bs=((f-T)*10)/(100-T);
- br=aa[25][0];
-
- if (bs==10)bs--;bs+='0';
- for ( b=0; b< OUT->nseq; b++)
- {
- r=tolower(OUT->seq_al[b][a]);
- if (r==br && bs>'1')OUT->seq_al[b][a]=bs;
- }
- OUT->seq_al[b][a]=bs;
- }
- }
- sprintf ( OUT->name[IN->nseq], "cons");
-
- return OUT;
- }
-
-Alignment * categories_evaluate_output ( Alignment *IN,Constraint_list *CL)
- {
-
- Alignment *OUT=NULL;
- int a, b, r;
- int *aa;
- float score, nseq2, tot_aln;
- float n;
- /*
- Residue x: sum of observed extended X.. /sum of possible X..
- */
- OUT=copy_aln (IN, OUT);
- aa=vcalloc ( 26, sizeof (int));
- nseq2=IN->nseq*IN->nseq;
-
- for (tot_aln=0, a=0; a< IN->len_aln; a++)
- {
- for (n=0,b=0; b< IN->nseq; b++)
- {
- r=IN->seq_al[b][a];
-
- if ( is_gap(r))n++;
- else
- {
- aa[tolower(r)-'a']++;
- n++;
- }
- }
- n=n*n;
- for ( score=0,b=0; b< 26; b++){score+=aa[b]*aa[b];aa[b]=0;}
- /*score/=nseq2;*/
- score=(n==0)?0:score/n;
- tot_aln+=score;
- r=score*10;
- r=(r>=10)?9:r;
- (OUT)->seq_al[OUT->nseq][a]='0'+r;
- }
- OUT->score_aln=(tot_aln/OUT->len_aln)*100;
- sprintf ( OUT->name[IN->nseq], "cons");
- vfree(aa);
- return OUT;
- }
-
-Alignment * categories_evaluate_output_old ( Alignment *IN,Constraint_list *CL)
- {
-
- Alignment *OUT=NULL;
- int nc,a, b, r;
- int *aa, ng;
- float score, nseq2, tot_aln, min=0;
-
- /*
- Residue x: sum of observed extended X.. /sum of possible X..
- */
- OUT=copy_aln (IN, OUT);
- aa=vcalloc ( 26, sizeof (int));
- nseq2=IN->nseq*IN->nseq;
-
- for (tot_aln=0, a=0; a< IN->len_aln; a++)
- {
- for (ng=0,b=0; b< IN->nseq; b++)
- {
- r=IN->seq_al[b][a];
-
- if ( is_gap(r))ng++;
- else
- {
- aa[tolower(r)-'a']++;
- }
- }
- for (nc=0, b=0; b<26; b++)
- {
- if ( aa[b])nc++;
- aa[b]=0;
- }
- if (nc>9)score=0;
- else score=9-nc;
-
- score=(2*min)/IN->nseq;
-
- tot_aln+=score;
- r=score*10;
- r=(r>=10)?9:r;
- (OUT)->seq_al[OUT->nseq][a]='0'+r;
- }
-
- OUT->score_aln=(tot_aln/OUT->len_aln)*100;
- sprintf ( OUT->name[IN->nseq], "cons");
- vfree(aa);
- return OUT;
- }
-Alignment * fork_triplet_coffee_evaluate_output ( Alignment *IN,Constraint_list *CL);
-Alignment * nfork_triplet_coffee_evaluate_output ( Alignment *IN,Constraint_list *CL);
-Alignment * triplet_coffee_evaluate_output ( Alignment *IN,Constraint_list *CL)
-{
-
- if (!IN || !CL || !CL->residue_index) return IN;
-
-
- if ( get_nproc()==1)return nfork_triplet_coffee_evaluate_output (IN,CL);
- else if (strstr ( CL->multi_thread, "evaluate"))return fork_triplet_coffee_evaluate_output (IN,CL);
- else return nfork_triplet_coffee_evaluate_output (IN,CL);
-}
-Alignment * fork_triplet_coffee_evaluate_output ( Alignment *IN,Constraint_list *CL)
- {
- Alignment *OUT=NULL;
- int **pos;
-
- double score_col=0, score_aln=0, score_res=0, score=0;
- double max_col=0, max_aln=0, max_res=0;
- double *max_seq, *score_seq;
-
- int a,b, x, y,res;
- int s1,r1,s2,r2,w2,s3,r3,w3;
- int **lu;
-
- //multi-threading
- FILE *fp;
- char **pid_tmpfile;
- int sjobs, njobs;
- int **sl;
- int j;
-
- OUT=copy_aln (IN, OUT);
- pos=aln2pos_simple(IN, IN->nseq);
- sprintf ( OUT->name[IN->nseq], "cons");
-
- max_seq=vcalloc ( IN->nseq, sizeof (double));
- score_seq=vcalloc ( IN->nseq, sizeof (double));
- lu=declare_int (IN->nseq, IN->len_aln+1);
-
- //multi Threading stuff
- njobs=get_nproc();
- sl=n2splits (njobs,IN->len_aln);
- pid_tmpfile=vcalloc (njobs, sizeof (char*));
-
- for (sjobs=0,j=0; sjobs<njobs; j++)
- {
- pid_tmpfile[j]=vtmpnam(NULL);
- if (vvfork (NULL)==0)
- {
- initiate_vtmpnam(NULL);
- fp=vfopen (pid_tmpfile[j], "w");
- score_aln=max_aln=0;
- for (a=sl[j][0]; a<sl[j][1]; a++)
- {
- if (j==0)output_completion (stderr,a,sl[0][1],1, "Final Evaluation");
- score_col=max_col=0;
- for (b=0; b<IN->nseq; b++)
- {
- s1=IN->order[b][0];
- r1=pos[b][a];
- if (r1>=0)lu[s1][r1]=1;
- }
- for (b=0; b<IN->nseq; b++)
- {
- score_res=max_res=NORM_F;
- res=NO_COLOR_RESIDUE;
- s1=IN->order[b][0];
- r1=pos[b][a];
- if (r1<=0)
- {
- fprintf (fp, "%d ", res);
- continue;
- }
-
- for (x=1;x<CL->residue_index[s1][r1][0];x+=ICHUNK)
- {
- s2=CL->residue_index[s1][r1][x+SEQ2];
- r2=CL->residue_index[s1][r1][x+R2];
- w2=CL->residue_index[s1][r1][x+WE];
- for (y=1; y<CL->residue_index[s2][r2][0];y+=ICHUNK)
- {
- s3=CL->residue_index[s2][r2][y+SEQ2];
- r3=CL->residue_index[s2][r2][y+R2];
- w3=CL->residue_index[s2][r2][y+WE];
-
- score=MIN(w2,w3);
-
- max_res+=score;
- max_col+=score;
- max_seq[b]+=score;
- max_aln+=score;
-
- if (lu[s3][r3])
- {
- score_res+=score;
- score_col+=score;
- score_seq[b]+=score;
- score_aln+=score;
- }
- }
- }
- res=(max_res==0)?NO_COLOR_RESIDUE:((score_res*10)/max_res);
- res=(res==NO_COLOR_RESIDUE)?res:(MIN(res,9));
- fprintf ( fp, "%d ", res);
- }
- for (b=0; b<IN->nseq; b++)
- {
- s1=IN->order[b][0];
- r1=pos[b][a];
- if (r1>0)lu[s1][r1]=0;
- }
-
- res=(max_col==0)?NO_COLOR_RESIDUE:((score_col*10)/max_col);
- res=(res==NO_COLOR_RESIDUE)?res:(MIN(res,9));
- fprintf (fp, "%d ", res);
- for (b=0; b<IN->nseq; b++)fprintf (fp, "%f %f ", score_seq[b], max_seq[b]);
- }
- fprintf (fp, "%f %f ", score_aln, max_aln);
- vfclose (fp);
- myexit (EXIT_SUCCESS);
- }
- else
- {
- sjobs++;
- }
- }
-
- while (sjobs>=0){vwait(NULL); sjobs--;}//wait for all jobs to complete
-
- for (j=0; j<njobs; j++)
- {
- float sseq, mseq;
- fp=vfopen (pid_tmpfile[j], "r");
- for (a=sl[j][0];a<sl[j][1]; a++)
- {
- for (b=0; b<=IN->nseq; b++)//don't forget the consensus
- {
- fscanf (fp, "%d ", &res);
- OUT->seq_al[b][a]=res;
- }
- for (b=0; b<IN->nseq; b++)
- {
- fscanf (fp, "%f %f", &sseq, &mseq);
- score_seq[b]+=sseq;
- max_seq[b]+=mseq;
- }
- }
- fscanf (fp, "%f %f", &sseq, &mseq);
- score_aln+=sseq;
- max_aln+=mseq;
- vfclose (fp);
- remove (pid_tmpfile[j]);
- }
- fprintf ( stderr, "\n");
-
- IN->score_aln=OUT->score_aln=(max_aln==0)?0:((score_aln*100)/max_aln);
- for ( a=0; a< OUT->nseq; a++)
- {
- OUT->score_seq[a]=(max_seq[a]==0)?0:((score_seq[a]*100)/max_seq[a]);
- }
-
- free_int (lu,-1);
- free_int (pos , -1);
- vfree (pid_tmpfile);
- free_int (sl, -1);
- vfree ( score_seq);
- vfree ( max_seq);
- return OUT;
- }
-
-Alignment * nfork_triplet_coffee_evaluate_output ( Alignment *IN,Constraint_list *CL)
- {
- Alignment *OUT=NULL;
- int **pos;
-
- double score_col=0, score_aln=0, score_res=0, score=0;
- double max_col=0, max_aln=0, max_res=0;
- double *max_seq, *score_seq;
-
- int a,b, x, y,res;
- int s1,r1,s2,r2,w2,s3,r3,w3;
- int **lu;
-
-
-
- OUT=copy_aln (IN, OUT);
- pos=aln2pos_simple(IN, IN->nseq);
- sprintf ( OUT->name[IN->nseq], "cons");
-
- max_seq=vcalloc ( IN->nseq, sizeof (double));
- score_seq=vcalloc ( IN->nseq, sizeof (double));
- lu=declare_int (IN->nseq, IN->len_aln+1);
-
-
-
-
- score_aln=max_aln=0;
- for (a=0; a<IN->len_aln; a++)
- {
- output_completion (stderr,a,IN->len_aln,1, "Final Evaluation");
- score_col=max_col=0;
- for (b=0; b<IN->nseq; b++)
- {
- s1=IN->order[b][0];
- r1=pos[b][a];
- if (r1>=0)lu[s1][r1]=1;
- }
- for (b=0; b<IN->nseq; b++)
- {
- score_res=max_res=NORM_F;
- OUT->seq_al[b][a]=NO_COLOR_RESIDUE;
- s1=IN->order[b][0];
- r1=pos[b][a];
- if (r1<=0)continue;
- for (x=1;x<CL->residue_index[s1][r1][0];x+=ICHUNK)
- {
- s2=CL->residue_index[s1][r1][x+SEQ2];
- r2=CL->residue_index[s1][r1][x+R2];
- w2=CL->residue_index[s1][r1][x+WE];
- for (y=1; y<CL->residue_index[s2][r2][0];y+=ICHUNK)
- {
- s3=CL->residue_index[s2][r2][y+SEQ2];
- r3=CL->residue_index[s2][r2][y+R2];
- w3=CL->residue_index[s2][r2][y+WE];
-
- score=MIN(w2,w3);
-
- max_res+=score;
- max_col+=score;
- max_seq[b]+=score;
- max_aln+=score;
-
- if (lu[s3][r3])
- {
- score_res+=score;
- score_col+=score;
- score_seq[b]+=score;
- score_aln+=score;
- }
- }
- }
- res=(max_res==0)?NO_COLOR_RESIDUE:((score_res*10)/max_res);
- res=(res==NO_COLOR_RESIDUE)?res:(MIN(res,9));
- OUT->seq_al[b][a]=res;
- }
- for (b=0; b<IN->nseq; b++)
- {
- s1=IN->order[b][0];
- r1=pos[b][a];
- if (r1>0)lu[s1][r1]=0;
- }
-
- res=(max_col==0)?NO_COLOR_RESIDUE:((score_col*10)/max_col);
- res=(res==NO_COLOR_RESIDUE)?res:(MIN(res,9));
- OUT->seq_al[IN->nseq][a]=res;
- }
- fprintf ( stderr, "\n");
-
- IN->score_aln=OUT->score_aln=(max_aln==0)?0:((score_aln*100)/max_aln);
- for ( a=0; a< OUT->nseq; a++)
- {
- OUT->score_seq[a]=(max_seq[a]==0)?0:((score_seq[a]*100)/max_seq[a]);
- }
-
- free_int (lu,-1);
- free_int (pos , -1);
- vfree ( score_seq);
- vfree ( max_seq);
- return OUT;
- }
-Alignment * fast_coffee_evaluate_output ( Alignment *IN,Constraint_list *CL)
- {
- int a,b, c, m,res, s, s1, s2, r1, r2;
- Alignment *OUT=NULL;
- int **pos, **pos2;
-
- double score_col=0, score_aln=0, score_res=0;
- double max_col, max_aln;
- double *max_seq, *score_seq;
- int local_m;
- int local_nseq;
-
-
- /*NORMALIZE: with the highest scoring pair found in the multiple alignment*/
-
-
- if ( !CL->evaluate_residue_pair){fprintf ( stderr, "\nWARNING: CL->evaluate_residue_pair Not set\nSet to: extend_residue_pair\n");CL->evaluate_residue_pair= extend_residue_pair; }
-
- OUT=copy_aln (IN, OUT);
- pos=aln2pos_simple(IN, IN->nseq);
- pos2=aln2defined_residues (IN, CL);
-
- max_seq=vcalloc ( IN->nseq, sizeof (double));
- score_seq=vcalloc ( IN->nseq, sizeof (double));
-
-
-
- /*1: Identify the highest scoring pair within the alignment*/
-
- for ( m=0, a=0; a< IN->len_aln; a++)
- {
- for ( b=0; b< IN->nseq; b++)
- {
- s1=IN->order[b][0];
- r1=pos[b][a];
-
-
- for ( c=0; c< IN->nseq; c++)
- {
- s2=IN->order[c][0];
- r2=pos[c][a];
- if ( s1==s2 && !CL->do_self)continue;
-
- if ( s1< s2)s=(CL->evaluate_residue_pair)( CL, s1, r1, s2, r2);
- else s=(CL->evaluate_residue_pair)( CL, s2, r2, s1, r1);
-
- s=(s!=UNDEFINED)?s:0;
- m=MAX(m, s);
- }
- }
- }
-
- local_m=m;
-
- sprintf ( OUT->name[IN->nseq], "cons");
- for ( max_aln=0,score_aln=0,a=0; a< IN->len_aln; a++)
- {
- OUT->seq_al[IN->nseq][a]=NO_COLOR_RESIDUE;
- for ( local_nseq=0,b=0; b<IN->nseq; b++){local_nseq+=(pos[b][a]>0 && pos2[b][a])?1:0;}
- local_m=m*(local_nseq-1);
-
- for ( max_col=0, score_col=0,b=0; b< IN->nseq; b++)
- {
- OUT->seq_al[b][a]=NO_COLOR_RESIDUE;
- s1=IN->order[b][0];
- r1=pos[b][a];
-
- if (r1<=0 || !pos2[b][a])
- {
- continue;
- }
-
- for ( score_res=0,c=0; c< IN->nseq; c++)
- {
- s2=IN->order[c][0];
- r2=pos[c][a];
-
- if ((s1==s2 && !CL->do_self) || r2<=0 || !pos2[c][a]){continue;}
- max_col +=m;
- max_seq[b]+=m;
- max_aln +=m;
-
- if ( s1< s2)s=(CL->evaluate_residue_pair)( CL, s1, r1, s2, r2);
- else s=(CL->evaluate_residue_pair)( CL, s2, r2, s1, r1);
- s=(s!=UNDEFINED)?s:0;
-
- score_res+=s;
- score_col+=s;
- score_seq[b]+=s;
- score_aln+=s;
- }
-
- res=(local_m==0)?NO_COLOR_RESIDUE:((score_res*10)/local_m);
- (OUT)->seq_al[b][a]=(res==NO_COLOR_RESIDUE)?res:(MIN(res,9));
-
-
- }
-
- res=(max_col==0)?NO_COLOR_RESIDUE:((score_col*10)/max_col);
- OUT->seq_al[IN->nseq][a]=(res==NO_COLOR_RESIDUE)?res:(MIN(res,9));
-
- }
-
- IN->score_aln=OUT->score_aln=(max_aln==0)?0:((score_aln*100)/max_aln);
- for ( a=0; a< OUT->nseq; a++)
- {
- OUT->score_seq[a]=(max_seq[a]==0)?0:((score_seq[a]*100)/max_seq[a]);
- }
-
- free_int (pos , -1);
- free_int (pos2, -1);
-
- vfree ( score_seq);
- vfree ( max_seq);
- return OUT;
- }
-
-Alignment * slow_coffee_evaluate_output ( Alignment *IN,Constraint_list *CL)
- {
- int a,b, c,res, s, s1, s2, r1, r2;
- Alignment *OUT=NULL;
- int **pos, **pos2;
- double max_score_r, score_r, max;
- double score_col=0, score_aln=0;
- double max_score_col, max_score_aln;
- double *max_score_seq, *score_seq;
- int ***res_extended_weight;
- int n_res_in_col;
-
-
- /*
- Residue x: sum of observed extended X.. /sum of possible X..
- */
-
-
-
-
- if ( !CL->evaluate_residue_pair){fprintf ( stderr, "\nWARNING: CL->evaluate_residue_pair Not set\nSet to: extend_residue_pair\n");CL->evaluate_residue_pair= extend_residue_pair; }
-
-
- OUT=copy_aln (IN, OUT);
- pos=aln2pos_simple(IN, IN->nseq);
- pos2=aln2defined_residues (IN, CL);
-
- max_score_seq=vcalloc ( IN->nseq, sizeof (double));
- score_seq=vcalloc ( IN->nseq, sizeof (double));
- res_extended_weight=declare_arrayN(3,sizeof(int), (CL->S)->nseq, (CL->S)->max_len+1, 2);
- max=(CL->normalise)?(100*CL->normalise)*SCORE_K:100;
-
- for (a=0; a< IN->len_aln; a++)
- {
- for ( b=0; b< IN->nseq-1; b++)
- {
- s1=IN->order[b][0];
- r1=pos[b][a];
- for ( c=b+1; c< IN->nseq; c++)
- {
- s2=IN->order[c][0];
- r2=pos[c][a];
- if ( s1==s2 && !CL->do_self)continue;
- else if ( r1<=0 || r2<=0) continue;
- else
- {
- s=(CL->evaluate_residue_pair)( CL, s1, r1, s2, r2);
- res_extended_weight[s1][r1][0]+=s*100;
- res_extended_weight[s2][r2][0]+=s*100;
- res_extended_weight[s1][r1][1]+=max;
- res_extended_weight[s2][r2][1]+=max;
- }
- }
- }
- }
-
-
- sprintf ( OUT->name[IN->nseq], "cons");
- for ( max_score_aln=0,score_aln=0,a=0; a< IN->len_aln; a++)
- {
- OUT->seq_al[IN->nseq][a]=NO_COLOR_RESIDUE;
- for ( n_res_in_col=0,b=0; b<IN->nseq; b++){n_res_in_col+=(pos[b][a]>0 && pos2[b][a]>0)?1:0;}
- for ( max_score_col=0, score_col=0,b=0; b< IN->nseq; b++)
- {
- OUT->seq_al[b][a]=NO_COLOR_RESIDUE;
- s1=IN->order[b][0];
- r1=pos[b][a];
- if (r1<=0 || pos2[b][a]<1)continue;
- else
- {
- max_score_r =res_extended_weight[s1][r1][1];
- score_r =res_extended_weight[s1][r1][0];
- if ( max_score_r==0 && n_res_in_col>1)res=0;
- else if ( n_res_in_col==1)res=NO_COLOR_RESIDUE;
- else res=((score_r*10)/max_score_r);
-
-
- (OUT)->seq_al[b][a]=(res==NO_COLOR_RESIDUE)?res:(MIN(res, 9));
- max_score_col+=max_score_r;
- score_col+=score_r;
- max_score_seq[b]+=max_score_r;
- score_seq[b]+=score_r;
- max_score_aln+=max_score_r;
- score_aln+=score_r;
- }
- if ( max_score_col==0 && n_res_in_col>1)res=0;
- else if ( n_res_in_col<2)res=NO_COLOR_RESIDUE;
- else res=((score_col*10)/max_score_col);
-
- OUT->seq_al[IN->nseq][a]=(res==NO_COLOR_RESIDUE)?res:(MIN(res,9));
- }
- }
- IN->score_aln=OUT->score_aln=(max_score_aln==0)?0:((score_aln*100)/max_score_aln);
- for ( a=0; a< OUT->nseq; a++)
- {
- OUT->score_seq[a]=(max_score_seq[a]==0)?0:((score_seq[a]*100)/max_score_seq[a]);
- }
-
-
- vfree ( score_seq);
- vfree ( max_score_seq);
- free_arrayN((void*)res_extended_weight, 3);
-
-
- free_int (pos, -1);
- free_int (pos2, -1);
- return OUT;
- }
-
-double genepred2sd (Sequence *S);
-double genepred2avg (Sequence *S);
-double genepred2zsum2 (Sequence *S);
-int * genepred2orf_len (Sequence *S);
-double genepred2acc (Sequence *S, Sequence *PS);
-double seq_genepred2acc (Sequence *S, Sequence *PS, char *name);
-Alignment *coffee_seq_evaluate_output ( Alignment *IN, Constraint_list *CL)
-{
- int **w, a,b,c,l,i;
- int avg, min_avg, best_cycle;
- char **pred_list,**weight_list;
- FILE *fp;
- Sequence *S, *PS;
- int ncycle=20;
- float nsd, best_score,score, acc;
- int min_ne;
- Alignment *A;
- float *count;
- min_ne=CL->ne/10; //avoid side effects due to very relaxed libraries
- w=NULL;
- pred_list=declare_char (ncycle, 100);
- weight_list=declare_char(ncycle,100);
- S=CL->S;
- fprintf ( stderr, "\nSCORE_MODE: %s\n", CL->genepred_score);
- for (a=0; a<ncycle && CL->ne>min_ne; a++)
- {
- if (w);free_int (w, -1);
- w=list2residue_total_weight (CL);
- PS=dnaseq2geneseq (S, w);
- nsd=(float)(genepred2sd(PS)/genepred2avg(PS));
- if ( strm (CL->genepred_score, "nsd"))score=nsd;
- else score=1-seq_genepred2acc (S, PS, CL->genepred_score);
- fprintf ( stderr, "Cycle: %2d LIB: %6d AVG LENGTH: %6.2f NSD: %8.2f SCORE: %.2f ",a+1,CL->ne,(float) genepred2avg(PS),nsd,score);
-
- vfree (display_accuracy (genepred_seq2accuracy_counts (S, PS, NULL),stderr));
- pred_list[a]=vtmpnam(NULL);
- weight_list[a]=vtmpnam(NULL);
- output_fasta_seqS(pred_list[a],PS);
- output_array_int (weight_list[a],w);
- free_sequence (PS, -1);
-
- if (a==0 || score<best_score)
- {
- best_score=score;
- best_cycle=a;
- }
- CL=relax_constraint_list_4gp(CL);
- }
- if (w)free_int(w,-1);
- S=get_fasta_sequence (pred_list[best_cycle], NULL);
-
- fprintf ( stderr, "\nSelected Cycle: %d (SCORE=%.4f)----> ", best_cycle+1, best_score);
- vfree (display_accuracy (genepred_seq2accuracy_counts ((CL->S), S, NULL),stderr));
-
- genepred_seq2accuracy_counts4all((CL->S), S);
-
- IN=seq2aln (S,NULL, 1);
- IN->score_res=input_array_int (weight_list[best_cycle]);
- return IN;
-}
-
-double seq_genepred2acc (Sequence *S, Sequence *PS, char *name)
-{
- float *count;
- float *acc;
- float r;
- int ref, target;
-
- if ( strm (name, "best"))return genepred2acc (S, PS);
-
- ref=name_is_in_list (name, S->name, S->nseq, 100);
- target=name_is_in_list (name, PS->name, PS->nseq, 100);
-
- if ( target==-1 || ref==-1)
- {
- printf_exit (EXIT_FAILURE,stderr, "\nERROR: %s is not a valid sequence", name);
- }
- count=genepred2accuracy_counts (S->seq[ref],PS->seq[target],NULL);
- acc=counts2accuracy (count);
-
- r=acc[3];
- vfree (acc);
- vfree (count);
- return r;
-}
-
-
-
-double genepred2acc (Sequence *S, Sequence *PS)
-{
- float *count;
- float *acc;
- float r;
- count=genepred_seq2accuracy_counts (S, PS, NULL);
- acc=counts2accuracy (count);
-
- r=acc[3];
- vfree (acc);
- vfree (count);
- return r;
-}
-
-double genepred2sd (Sequence *S)
-{
- double sum=0, sum2=0;
- int a, b;
- int *len;
- len=genepred2orf_len (S);
-
- for (a=0; a<S->nseq; a++)
- {
- sum+=(double)len[a];
- sum2+=(double)len[a]*(double)len[a];
- }
- sum/=(double)S->nseq;
- sum2/=S->nseq;
- vfree (len);
- return sqrt (sum2-(sum*sum));
-}
-double genepred2avg (Sequence *S)
-{
- int a, b;
- double avg=0;
- int *len;
- len=genepred2orf_len (S);
-
- for (a=0; a<S->nseq; a++)avg+=len[a];
- vfree (len);
- return avg/(double)S->nseq;
-}
-double genepred2zsum2 (Sequence *S)
-{
- double sum=0, sum2=0, zscore=0, zsum2=0;
- int a, b;
- int *len;
- double sd, avg;
- sd=genepred2sd(S);
- avg=genepred2avg (S);
-
- len=genepred2orf_len (S);
- for (a=0; a<S->nseq; a++)
- {
- zscore=((double)len[a]-avg)/sd;
- zsum2+=FABS(zscore);
- }
- zsum2/=(float)S->nseq;
- vfree (len);
- return zsum2;
-}
-int *genepred2orf_len (Sequence *S)
-{
- int a,b, *len;
- len=vcalloc (S->nseq, sizeof (int));
- for (a=0; a<S->nseq; a++)
- for (b=0; b<S->len[a]; b++)
- len[a]+=(isupper(S->seq[a][b]));
- return len;
-}
-
-Alignment *coffee_seq_evaluate_output_old2 ( Alignment *IN, Constraint_list *CL)
-{
- int **w, a,b,c,l,i;
- int avg, min_avg, best_cycle;
- char **pred_list; FILE *fp;
- Sequence *S, *PS;
- int ncycle=100;
- w=NULL;
- pred_list=declare_char (ncycle, 100);
- S=CL->S;
-
- //CL=expand_constraint_list_4gp(CL);
- min_avg=constraint_list2avg(CL);
-
- for (a=1; a<ncycle && CL->ne>0; a++)
- {
- int t;
- if (w);free_int (w, -1);
- w=list2residue_total_weight (CL);
- CL=relax_constraint_list_4gp (CL);
- fprintf (stderr,"\nRELAX CYCLE: %2d AVG: %5d [%10d] ", a, avg=constraint_list2avg(CL), CL->ne);
- for (b=0; b<S->nseq; b++)for (c=0; c<S->len[b]; c++)w[b][c]-=1;
-
- //rescale nuclotide coding weights
-
- PS=dnaseq2geneseq (S, w);
- vfree (display_accuracy (genepred_seq2accuracy_counts (S, PS, NULL),stderr));
-
-
- free_sequence (PS, PS->nseq);
- if ( avg<min_avg)
- {
- min_avg=avg;
- best_cycle=a;
- }
- }
-
- S=get_fasta_sequence (pred_list[best_cycle], NULL);
- vfree (display_accuracy (genepred_seq2accuracy_counts ((CL->S), S, NULL),stderr));myexit (0);
- for (a=0; a<S->nseq; a++)
- HERE (">%s\n%s", S->name[a], S->seq[a]);
- myexit (0);
- return seq2aln (S,NULL, 1);
-}
-
-
-
-Alignment * non_extended_t_coffee_evaluate_output ( Alignment *IN,Constraint_list *CL)
- {
- int a,b, c,res, s1, s2, r1, r2;
- Alignment *OUT=NULL;
- int **pos;
- int max_score_r, score_r;
- double score_col=0, score_aln=0;
- int max_score_col, max_score_aln;
- double *max_score_seq, *score_seq;
- int local_nseq;
- int **tot_non_extended_weight;
- int **res_non_extended_weight;
- int *l;
- CLIST_TYPE *entry=NULL;
- int p;
- int max_score=0;
-
- entry=vcalloc (CL->entry_len+1, CL->el_size);
- if ( !CL->evaluate_residue_pair){fprintf ( stderr, "\nWARNING: CL->evaluate_residue_pair Not set\nSet to: extend_residue_pair\n");CL->evaluate_residue_pair= extend_residue_pair; }
-
- OUT=copy_aln (IN, OUT);
- pos=aln2pos_simple(IN, IN->nseq);
-
-
- max_score_seq=vcalloc ( IN->nseq, sizeof (double));
- score_seq=vcalloc ( IN->nseq, sizeof (double));
-
- tot_non_extended_weight=list2residue_total_weight(CL);
- res_non_extended_weight=declare_int ((CL->S)->nseq, (CL->S)->max_len+1);
-
- for (a=0; a< IN->len_aln; a++)
- {
- for ( b=0; b< IN->nseq-1; b++)
- {
- s1=IN->order[b][0];
- r1=pos[b][a];
- for ( c=b+1; c< IN->nseq; c++)
- {
- s2=IN->order[c][0];
- r2=pos[c][a];
- if ( s1==s2 && !CL->do_self)continue;
- else if ( r1<=0 || r2<=0) continue;
- else
- {
- entry[SEQ1]=s1;
- entry[SEQ2]=s2;
- entry[R1]=r1;
- entry[R2]=r2;
- if ((l=main_search_in_list_constraint (entry,&p,4,CL))!=NULL)
- {
- res_non_extended_weight[s1][r1]+=l[WE];
- res_non_extended_weight[s2][r2]+=l[WE];
- }
- entry[SEQ1]=s2;
- entry[SEQ2]=s1;
- entry[R1]=r2;
- entry[R2]=r1;
- if ((l=main_search_in_list_constraint (entry,&p,4,CL))!=NULL)
- {
- res_non_extended_weight[s1][r1]+=l[WE];
- res_non_extended_weight[s2][r2]+=l[WE];
- }
- max_score=MAX(max_score,res_non_extended_weight[s1][r1]);
- max_score=MAX(max_score,res_non_extended_weight[s2][r2]);
-
- }
- }
- }
- }
-
- sprintf ( OUT->name[IN->nseq], "cons");
- for ( max_score_aln=0,score_aln=0,a=0; a< IN->len_aln; a++)
- {
- OUT->seq_al[IN->nseq][a]=NO_COLOR_RESIDUE;
- for ( local_nseq=0,b=0; b<IN->nseq; b++){local_nseq+=(pos[b][a]>0)?1:0;}
-
- for ( max_score_col=0, score_col=0,b=0; b< IN->nseq; b++)
- {
- OUT->seq_al[b][a]=NO_COLOR_RESIDUE;
- s1=IN->order[b][0];
- r1=pos[b][a];
- if (r1<=0)continue;
- else
- {
- max_score_r =max_score;/*tot_non_extended_weight[s1][r1];*/
- score_r=res_non_extended_weight[s1][r1];
- res=(max_score_r==0 || local_nseq<2 )?NO_COLOR_RESIDUE:((score_r*10)/max_score_r);
-
- (OUT)->seq_al[b][a]=(res==NO_COLOR_RESIDUE)?res:(MIN(res, 9));
- max_score_col+=max_score_r;
- score_col+=score_r;
- max_score_seq[b]+=max_score_r;
- score_seq[b]+=score_r;
- max_score_aln+=max_score_r;
- score_aln+=score_r;
- }
- res=(max_score_col==0 || local_nseq<2)?NO_COLOR_RESIDUE:((score_col*10)/max_score_col);
- OUT->seq_al[IN->nseq][a]=(res==NO_COLOR_RESIDUE)?res:(MIN(res,9));
- }
- }
- IN->score_aln=OUT->score_aln=(max_score_aln==0)?0:((score_aln*100)/max_score_aln);
- for ( a=0; a< OUT->nseq; a++)
- {
- OUT->score_seq[a]=(max_score_seq[a]==0)?0:((score_seq[a]*100)/max_score_seq[a]);
- OUT->score_seq[a]=(OUT->score_seq[a]>100)?100:OUT->score_seq[a];
- }
- OUT->score_aln=(OUT->score_aln>100)?100:OUT->score_aln;
-
- vfree ( score_seq);
- vfree ( max_score_seq);
-
- free_int (tot_non_extended_weight, -1);
- free_int (res_non_extended_weight, -1);
- vfree(entry);
- free_int (pos, -1);
-
- return OUT;
- }
-
-
-/*********************************************************************************************/
-/* */
-/* PROFILE/PRofile Functions */
-/* */
-/*********************************************************************************************/
-int channel_profile_profile (int *prf1, int *prf2, Constraint_list *CL);
-
-Profile_cost_func get_profile_mode_function (char *name, Profile_cost_func func)
-{
- int a;
- static int nfunc;
- static Profile_cost_func *flist;
- static char **nlist;
-
-
-
- /*The first time: initialize the list of pairwse functions*/
- /*If func==NULL:REturns a pointer to the function associated with a name*/
- /*If name is empty:Prints the name of the function associated with name*/
-
- if ( nfunc==0)
- {
- flist=vcalloc ( 100, sizeof (Pwfunc));
- nlist=declare_char (100, 100);
-
- flist[nfunc]=cw_profile_profile;
- sprintf (nlist[nfunc], "cw_profile_profile");
- nfunc++;
-
- flist[nfunc]=muscle_profile_profile;
- sprintf (nlist[nfunc], "muscle_profile_profile");
- nfunc++;
-
- flist[nfunc]=channel_profile_profile;
- sprintf (nlist[nfunc], "channel_profile_profile");
- nfunc++;
- }
-
- for ( a=0; a<nfunc; a++)
- {
- if ( (name && strm (nlist[a],name)) || flist[a]==func)
- {
- if (name)sprintf (name,"%s", nlist[a]);
- return flist[a];
- }
- }
- fprintf ( stderr, "\n[%s] is an unknown profile_profile function[FATAL:%s]\n",name, PROGRAM);
- crash ( "\n");
- return NULL;
- }
-
-int generic_evaluate_profile_score (Constraint_list *CL,Alignment *Profile1,int s1, int r1, Alignment *Profile2,int s2, int r2, Profile_cost_func prf_prf)
- {
- int *prf1, *prf2;
- static int *dummy;
- int score;
-
-
- /*Generic profile function*/
- if( !dummy)
- {
- dummy=vcalloc (10, sizeof(int));
- dummy[0]=1;/*Number of Amino acid types on colum*/
- dummy[1]=5;/*Length of Dummy*/
- dummy[3]='\0';/*Amino acid*/
- dummy[4]=1; /*Number of occurences*/
- dummy[5]=100; /*Frequency in the MSA column*/
-
- }
-
- if (r1>0 && r2>0)
- {
- r1--;
- r2--;
-
- prf1=(Profile1)?(Profile1->P)->count2[r1]:NULL;
- prf2=(Profile2)?(Profile2->P)->count2[r2]:NULL;
-
- if (!prf1) {prf1=dummy; prf1[3]=(CL->S)->seq[s1][r1];}
- else if (!prf2){prf2=dummy; prf2[3]=(CL->S)->seq[s2][r2];}
-
- score=((prf_prf==NULL)?cw_profile_profile:prf_prf) (prf1, prf2, CL);
- return score;
- }
- else
- return 0;
- }
-
-int cw_profile_profile_count (int *prf1, int *prf2, Constraint_list *CL)
- {
- /*see function aln2count2 for prf structure*/
- int a, b, n;
- int res1, res2;
- double score=0;
-
-
- for ( n=0,a=3; a<prf1[1]; a+=3)
- for ( b=3; b<prf2[1]; b+=3)
- {
-
- res1=prf1[a];
- res2=prf2[b];
-
- score+=prf1[a+1]*prf2[b+1]*CL->M[res1-'A'][res2-'A'];
- n+=prf1[a+1]*prf2[b+1];
- }
-
-
- score=(score*SCORE_K)/n;
- return score;
- }
-int muscle_profile_profile (int *prf1, int *prf2, Constraint_list *CL)
- {
- /*see function aln2count2 for prf structure*/
- int a, b;
- int res1, res2;
- double score=0, fg1, fg2, fi, fj, m;
- static double *exp_lu;
- if (exp_lu==NULL)
- {
- exp_lu=vcalloc ( 10000, sizeof (double));
- exp_lu+=2000;
- for ( a=-1000; a<1000; a++)
- exp_lu[a]=exp((double)a);
- }
-
-
-
- for (a=3; a<prf1[1]; a+=3)
- {
- res1=prf1[a];
- fi=(double)prf1[a+2]/100;
-
- for ( b=3; b<prf2[1]; b+=3)
- {
- res2=prf2[b];
- fj=(double)prf2[b+2]/100;
- /*m=exp((double)CL->M[res1-'A'][res2-'A']);*/
- m=exp_lu[CL->M[res1-'A'][res2-'A']];
- score+=m*fi*fj;
- }
- }
-
- fg1=(double)prf1[2]/100;
- fg2=(double)prf2[2]/100;
- score=(score==0)?0:log(score)*(1-fg1)*(1-fg2);
- score=(score*SCORE_K);
- /*if ( score<-100)fprintf ( stderr, "\nSCORE %d %d", (int)score, cw_profile_profile(prf1, prf2, CL));*/
-
- return (int)score;
- }
-int cw_profile_profile (int *prf1, int *prf2, Constraint_list *CL)
- {
- /*see function aln2count2 for prf structure*/
- int a, b, n,p;
- int res1, res2;
- double score=0;
-
-
- for ( n=0,a=3; a<prf1[1]; a+=3)
- for ( b=3; b<prf2[1]; b+=3)
- {
-
- res1=prf1[a];
- res2=prf2[b];
- p=prf1[a+1]*prf2[b+1];
-
- n+=p;
- score+=p*CL->M[res1-'A'][res2-'A'];
- }
- score=(score*SCORE_K)/((double)(n==0)?1:n);
- return score;
- }
-int cw_profile_profile_old (int *prf1, int *prf2, Constraint_list *CL)
- {
- /*see function aln2count2 for prf structure*/
- int a, b, n,p;
- int res1, res2;
- double score=0;
-
-
-
- for ( n=0,a=3; a<prf1[1]; a+=3)
- for ( b=3; b<prf2[1]; b+=3)
- {
-
- res1=prf1[a];
- res2=prf2[b];
- p=prf1[a+1]*prf2[b+1];
-
- n+=p;
- score+=p*CL->M[res1-'A'][res2-'A'];
- }
- score=(score*SCORE_K)/((double)(n==0)?1:n);
- return score;
- }
-int channel_profile_profile ( int *prf1, int *prf2, Constraint_list *CL)
-{
-
- int score=0;
-
- prf1+=prf1[1];
- prf2+=prf2[1];
-
-
- if (prf1[0]!=prf1[0]){fprintf ( stderr, "\nERROR: Inconsistent number of channels [channel_profile_profile::FATAL%s]", PROGRAM);}
- else
- {
- int a, n;
- for (a=1, n=0; a<=prf1[0]; a++)
- {
- if (prf1[a]>0 && prf2[a]>0)
- {
- n++;score+=CL->M[prf1[a]-'A'][prf2[a]-'A'];
-
- }
- }
-
- if ( n==0)return 0;
-
- score=(n==0)?0:(score*SCORE_K)/n;
-
- }
- return score;
-}
-
-/*********************************************************************************************/
-/* */
-/* FUNCTIONS FOR GETING THE COST : (Sequences) ->evaluate_residue_pair */
-/* */
-/*********************************************************************************************/
-int initialize_scoring_scheme (Constraint_list *CL)
-{
- if (!CL) return 0;
- else if (!CL->evaluate_residue_pair)return 0;
- else if ( !CL->S) return 0;
- else if ( (CL->S)->nseq<2) return 0;
- else if ( strlen ((CL->S)->seq[0])==0)return 0;
- else if ( strlen ((CL->S)->seq[1])==0)return 0;
- else
- {
- //CL->evaluate_residue_pair (CL,13,1,16,1);
- CL->evaluate_residue_pair (CL,0,1,1,1);
-
- }
- return 1;
-}
-
-int evaluate_blast_profile_score (Constraint_list *CL, int s1, int r1, int s2, int r2)
-{
- Alignment *PRF1;
- Alignment *PRF2;
-
-
- PRF1=(Alignment*)atop(seq2T_value (CL->S, s1, "A", "_RB_"));
- PRF2=(Alignment*)atop(seq2T_value (CL->S, s2, "A", "_RB_"));
-
- return generic_evaluate_profile_score (CL,PRF1,s1, r1, PRF2,s2, r2, CL->profile_mode);
-}
-
-int evaluate_aln_profile_score (Constraint_list *CL, int s1, int r1, int s2, int r2)
-{
-
- return generic_evaluate_profile_score (CL,seq2R_template_profile((CL->S),s1),s1, r1, seq2R_template_profile(CL->S,s2),s2, r2, CL->profile_mode);
-}
-
-
-int evaluate_profile_score (Constraint_list *CL,Alignment *Prf1, int s1, int r1, Alignment *Prf2,int s2, int r2)
-{
-
- return generic_evaluate_profile_score (CL, Prf1, s1,r1,Prf2, s2,r2,CL->profile_mode);
-}
-
-int evaluate_cdna_matrix_score (Constraint_list *CL, int s1, int r1, int s2, int r2)
- {
- char a1, a2;
-
- if (r1>0 && r2>0)
- {
- r1--;
- r2--;
-
- a1=translate_dna_codon((CL->S)->seq[s1]+r1,'x');
- a2=translate_dna_codon((CL->S)->seq[s2]+r2,'x');
-
-
-
- if (a1=='x' || a2=='x')return 0;
- else return CL->M[a1-'A'][a2-'A']*SCORE_K;
- }
- else
- {
- return 0;
- }
- }
-int evaluate_physico_score ( Constraint_list *CL, int s1, int r1, int s2, int r2)
-{
- int a, b, p;
- double tot;
- static float **prop_table;
- static int n_prop;
- static double max;
- if (r1<0 || r2<0)return 0;
- if ( !prop_table)
- {
- prop_table= initialise_aa_physico_chemical_property_table(&n_prop);
- for (p=0; p< n_prop; p++)max+=100;
- max=sqrt(max);
- }
- a=tolower (( CL->S)->seq[s1][r1]);
- b=tolower (( CL->S)->seq[s2][r2]);
-
- for (tot=0,p=0; p< n_prop; p++)
- {
- tot+=(double)(prop_table[p][a]-prop_table[p][b])*(prop_table[p][a]-prop_table[p][b]);
- }
-
- tot=(sqrt(tot)/max)*10;
-
- return (int) tot*SCORE_K;
-}
-
-
-
-int evaluate_diaa_matrix_score ( Constraint_list *CL, int s1, int r1, int s2, int r2)
- {
-
- static int ****m;
- static int *alp;
-
- if (m==NULL)
- {
- FILE *fp;
- char k1[2], k2[2];
- int v1, v2, c;
- char *buf=NULL;
- int a;
-
- m=declare_arrayN(4, sizeof (int), 26, 26, 26, 26);
- fp=vfopen ("diaa_mat.mat", "r");
- while ((c=fgetc (fp))!=EOF)
- {
-
- ungetc (c, fp);
- buf=vfgets(buf, fp);
-
- if (c=='#');
- else
- {
- sscanf (buf, "%s %s %d %d", k1, k2, &v1, &v2);
-
- m[k1[0]-'a'][k1[1]-'a'][k2[0]-'a'][k2[1]-'a']=v1;
- m[k2[0]-'a'][k2[1]-'a'][k1[0]-'a'][k1[1]-'a']=v1;
- }
- }
- vfclose (fp);
- alp=vcalloc (256, sizeof (int));
- for (a=0; a<26; a++)alp[a+'a']=1;
- alp['b']=0;
- alp['j']=0;
- alp['o']=0;
- alp['u']=0;
- alp['x']=0;
- alp['z']=0;
- }
-
-
- if (r1>0 && r2>0)
- {
- int s=0, n=0;
- char aa1, aa2, aa3, aa4, u;
-
- r1--;
- r2--;
-
- if (r1>0 && r2>0)
- {
- aa1=tolower((CL->S)->seq[s1][r1-1]);
- aa2=tolower((CL->S)->seq[s1][r1]);
- aa3=tolower((CL->S)->seq[s2][r2-1]);
- aa4=tolower((CL->S)->seq[s2][r2]);
- u=alp[(int)aa1];u+=alp[(int)aa2];u+=alp[(int)aa3];u+=alp[(int)aa4];
- if (u==4)
- {
- s+=m[aa1-'a'][aa2-'a'][aa3-'a'][aa4-'a'];
- n++;
- }
- }
-
- aa1=tolower((CL->S)->seq[s1][r1]);
- aa2=tolower((CL->S)->seq[s1][r1+1]);
- aa3=tolower((CL->S)->seq[s2][r2]);
- aa4=tolower((CL->S)->seq[s2][r2+1]);
- u=alp[(int)aa1];u+=alp[(int)aa2];u+=alp[(int)aa3];u+=alp[(int)aa4];
- if (u==4)
- {
- s+=m[aa1-'a'][aa2-'a'][aa3-'a'][aa4-'a'];
- n++;
- }
- if (n)return (s*SCORE_K)/n;
- else return 0;
- }
- return 0;}
-int evaluate_monoaa_matrix_score ( Constraint_list *CL, int s1, int r1, int s2, int r2)
- {
-
- static int **m;
- static int *alp;
-
- if (m==NULL)
- {
- FILE *fp;
- char k1[2], k2[2];
- int v1, v2, c;
- char *buf=NULL;
- int a;
-
- m=declare_arrayN(2, sizeof (int), 26, 26);
- fp=vfopen ("monoaa_mat.mat", "r");
- while ((c=fgetc (fp))!=EOF)
- {
-
- ungetc (c, fp);
- buf=vfgets(buf, fp);
-
- if (c=='#');
- else
- {
- sscanf (buf, "%s %s %d %d", k1, k2, &v1, &v2);
-
- m[k1[0]-'a'][k2[0]-'a']=v1;
- m[k2[0]-'a'][k1[0]-'a']=v1;
- }
- }
- vfclose (fp);
- alp=vcalloc (256, sizeof (int));
- for (a=0; a<26; a++)alp[a+'a']=1;
- alp['b']=0;
- alp['j']=0;
- alp['o']=0;
- alp['u']=0;
- alp['x']=0;
- alp['z']=0;
- }
-
-
- if (r1>0 && r2>0)
- {
- int s=0, n=0;
- char aa1, aa3, u;
-
- r1--;
- r2--;
-
- if (r1>0 && r2>0)
- {
- aa1=tolower((CL->S)->seq[s1][r1]);
- aa3=tolower((CL->S)->seq[s2][r2]);
- u=alp[(int)aa1];u+=alp[(int)aa3];
- if (u==2)
- {
- s+=m[aa1-'a'][aa3-'a'];
- n++;
- }
- }
-
- if (n)return (s*SCORE_K)/n;
- else return 0;
- }
- return 0;}
-int evaluate_matrix_score ( Constraint_list *CL, int s1, int r1, int s2, int r2)
- {
-
- if ( seq2R_template_profile (CL->S,s1) ||seq2R_template_profile (CL->S,s2))
- {
- return evaluate_aln_profile_score ( CL, s1,r1, s2, r2);
- }
-
- if (r1>0 && r2>0)
- {
- r1--;
- r2--;
-
- return CL->M[(CL->S)->seq[s1][r1]-'A'][(CL->S)->seq[s2][r2]-'A']*SCORE_K;
- }
- else
- return 0;
- }
-int *get_curvature ( int s1, Constraint_list *CL);
-int evaluate_curvature_score( Constraint_list *CL, int s1, int r1, int s2, int r2)
-{
- static int **st;
- int score;
- CL->gop=0;
- CL->gep=0;
-
- if (!st) st= vcalloc ((CL->S)->nseq, sizeof (char*));
- if (!st[s1])
- {
- st[s1]=get_curvature (s1, CL);
- }
- if (!st[s2])
- {
- st[s2]=get_curvature (s2, CL);
- }
-
- if (r1>0 && r2>0)
- {
- char p1, p2;
-
- r1--;
- r2--;
-
- p1=st[s1][r1];
- p2=st[s2][r2];
-
- score=p1-p2;
- score=FABS(score);
- score=20-score;
- //HERE ("%d", score);
- //if (score<0)HERE ("%d", score);
- return score;
- }
- else
- {
- return 0;
- }
-
-}
-int *get_curvature ( int s1, Constraint_list *CL)
-{
- int *array, n=0, a;
- char c;
- FILE *fp;
- char name [1000], b1[100], b2[100];
- float f;
- sprintf ( name, "%s.curvature", (CL->S)->name[s1]);
- array=vcalloc (strlen ((CL->S)->seq[s1]), sizeof (int));
- fp=vfopen ( name, "r");
- while ( fscanf (fp, "%s %d %c %f\n",b1, &a, &c,&f )==4)
- {
- if ( c!=(CL->S)->seq[s1][n]){HERE ("ERROR: %c %c", c,(CL->S)->seq[s1][n] );myexit (0);}
- else array[n++]=(int)(float)100*(float)f;
- }
- vfclose (fp);
- return array;
-}
-int evaluate_tm_matrix_score ( Constraint_list *CL, int s1, int r1, int s2, int r2)
-{
- static char **st;
- float F, RF;
-
- if (!st)
- {
- st= vcalloc ((CL->S)->nseq, sizeof (char*));
- RF=atoigetenv ("TM_FACTOR_4_TCOFFEE");
- if ( !RF)RF=10;
- }
-
- if (!st[s1])st[s1]=seq2T_template_string((CL->S),s1);
- if (!st[s2])st[s2]=seq2T_template_string((CL->S),s2);
-
- if (r1<=0 || r2<=0)return 0;
- else if (st[s1] && st[s2] && (st[s1][r1-1]==st[s2][r2-1]))F=RF;
- else if (st[s1] && st[s2] && (st[s1][r1-1]!=st[s2][r2-1]))F=0;
- else F=0;
-
- return (int)(evaluate_matrix_score (CL, s1, r1, s2, r2))+F;
-}
-int evaluate_ssp_matrix_score ( Constraint_list *CL, int s1, int r1, int s2, int r2)
-{
- static char **st;
- float F, RF;
-
- if (!st)
- {
- st= vcalloc ((CL->S)->nseq, sizeof (char*));
- RF=atoigetenv ("SSP_FACTOR_4_TCOFFEE");
- if ( !RF)RF=10;
- }
-
- if (!st[s1])st[s1]=seq2T_template_string((CL->S),s1);
- if (!st[s2])st[s2]=seq2T_template_string((CL->S),s2);
-
- if (r1<=0 || r2<=0)return 0;
- else if (st[s1] && st[s2] && (st[s1][r1-1]==st[s2][r2-1]))F=RF;
- else if (st[s1] && st[s2] && (st[s1][r1-1]!=st[s2][r2-1]))F=0;
- else F=0;
-
- return (int)(evaluate_matrix_score (CL, s1, r1, s2, r2))+F;
-}
-
-
-int evaluate_combined_matrix_score ( Constraint_list *CL, int s1, int r1, int s2, int r2)
- {
- /*
- function documentation: start
-
- int evaluate_matrix_score ( Constraint_list *CL, int s1, int s2, int r1, int r2)
-
- this function evaluates the score for matching residue S1(r1) wit residue S2(r2)
- using Matrix CL->M;
-
- function documentation: end
- */
-
- if ( seq2R_template_profile (CL->S,s1) ||seq2R_template_profile (CL->S,s2))
- {
- return evaluate_aln_profile_score ( CL, s1,r1, s2, r2);
- }
-
- if (r1>0 && r2>0)
- {
- r1--;
- r2--;
- if (r1==0 || r2==0)return CL->M[(CL->S)->seq[s1][r1]-'A'][(CL->S)->seq[s2][r2]-'A']*SCORE_K;
- else
- {
- int A1, A2, B1, B2, a2, b2;
- int score;
-
- A1=toupper((CL->S)->seq[s1][r1-1]);
- A2=toupper((CL->S)->seq[s1][r1]);
- B1=toupper((CL->S)->seq[s2][r2-1]);
- B2=toupper((CL->S)->seq[s2][r2]);
-
- a2=tolower(A2);
- b2=tolower(B2);
- A1-='A';A2-='A';B1-='A'; B2-='A';a2-='A';b2-='A';
-
- score=CL->M[a2][b2]-FABS((CL->M[A1][A2])-(CL->M[B1][B2]));
- score*=SCORE_K;
- return score;
- }
- }
- else
- return 0;
- }
-
-int residue_pair_non_extended_list ( Constraint_list *CL, int s1, int r1, int s2, int r2 )
- {
-
- /*
- This is the generic Function->works with everything
-
- int residue_pair_non_extended_list ( Constraint_list *CL, int s1, int r1, int s2, int r2, int field );
-
- Computes the non extended score for aligning residue seq1(r1) Vs seq2(r2)
-
- This function can compare a sequence with itself.
-
- Associated functions: See util constraint list, list extention functions.
- function documentation: end
- */
-
- int p;
-
-
- static int *entry;
- int *r;
- int field;
-
- field = CL->weight_field;
-
-
- if ( r1<=0 || r2<=0)return 0;
- else if ( !CL->extend_jit)
- {
- if ( !entry) entry=vcalloc (LIST_N_FIELDS , sizeof (int));
- entry[SEQ1]=s1;
- entry[SEQ2]=s2;
- entry[R1]=r1;
- entry[R2]=r2;
- if ( r1==r2 && s1==s2) return UNDEFINED;
- r=main_search_in_list_constraint( entry,&p,4,CL);
- if (r==NULL)return 0;
- else return r[field]*SCORE_K;
- }
- else
- return UNDEFINED;/*ERROR*/
-
-
- }
-
-
-
-int residue_pair_extended_list_mixt (Constraint_list *CL, int s1, int r1, int s2, int r2 )
- {
- int score=0;
-
- score+= residue_pair_extended_list_quadruplet(CL, s1, r1, s2, r2);
- score+= residue_pair_extended_list (CL, s1, r1, s2, r2);
-
- return score*SCORE_K;
- }
-
-int residue_pair_extended_list_quadruplet (Constraint_list *CL, int s1, int r1, int s2, int r2 )
- {
- double score=0;
-
- int t_s, t_r, t_w, q_s, q_r, q_w;
- int a, b;
- static int **hasch;
-
- int field;
- /* This measure the quadruplets cost on a pair of residues*/
-
-
-
- field=CL->weight_field;
-
- if ( r1<=0 || r2<=0)return 0;
- if ( !hasch)
- {
- hasch=vcalloc ( (CL->S)->nseq, sizeof (int*));
- for ( a=0; a< (CL->S)->nseq; a++)hasch[a]=vcalloc ( (CL->S)->len[a]+1, sizeof (int));
- }
-
-
- hasch[s1][r1]=100000;
- for (a=1; a< CL->residue_index[s1][r1][0]; a+=ICHUNK)
- {
- t_s=CL->residue_index[s1][r1][a+SEQ2];
- t_r=CL->residue_index[s1][r1][a+R2];
- t_w=CL->residue_index[s1][r1][a+WE];
- if ( CL->seq_for_quadruplet[t_s])
- {
- for ( b=1; b<CL->residue_index[t_s][t_r][0]; b+=ICHUNK)
- {
- q_s=CL->residue_index[t_s][t_r][b+SEQ2];
- q_r=CL->residue_index[t_s][t_r][b+R2];
- q_w=CL->residue_index[t_s][t_r][b+WE];
- if (CL-> seq_for_quadruplet[q_s])
- hasch[q_s][q_r]=MIN(q_w,t_w);
-
- }
- }
- }
-
-
- for (a=1; a< CL->residue_index[s2][r2][0]; a+=ICHUNK)
- {
- t_s=CL->residue_index[s2][r2][a+SEQ2];
- t_r=CL->residue_index[s2][r2][a+R2];
- t_w=CL->residue_index[s2][r2][a+WE];
- if ( CL->seq_for_quadruplet[t_s])
- {
- for ( b=1; b<CL->residue_index[t_s][t_r][0]; b+=ICHUNK)
- {
- q_s=CL->residue_index[t_s][t_r][b+SEQ2];
- q_r=CL->residue_index[t_s][t_r][b+R2];
- q_w=CL->residue_index[t_s][t_r][b+WE];
- if (hasch[q_s][q_r] && CL->seq_for_quadruplet[q_s])
- score+=MIN(hasch[q_s][q_r],MIN(q_w,t_w));
- }
- }
- }
-
- score=(CL->normalise)?((score*CL->normalise)/CL->max_ext_value):score;
-
- for (a=1; a< CL->residue_index[s1][r1][0]; a+=ICHUNK)
- {
- t_s=CL->residue_index[s1][r1][a+SEQ2];
- t_r=CL->residue_index[s1][r1][a+R2];
- t_w=CL->residue_index[s1][r1][a+WE];
- if ( CL->seq_for_quadruplet[t_s])
- {
- for ( b=1; b<CL->residue_index[t_s][t_r][0]; b+=ICHUNK)
- {
- q_s=CL->residue_index[t_s][t_r][b+SEQ2];
- q_r=CL->residue_index[t_s][t_r][b+R2];
- hasch[q_s][q_r]=0;
- }
- }
- }
-
- return (int)(score*SCORE_K);
- }
-
-
-Constraint_list * R_extension ( Constraint_list *CL, Constraint_list *R);
-int residue_pair_extended_list4rna4 (Constraint_list *CL,int s1, int r1, int s2, int r2 )
-{
- static int rna_lib;
-
- if (!rna_lib)
- {
- sprintf ( CL->rna_lib, "%s", seq2rna_lib (CL->S, NULL));
- rna_lib=1;
- }
- return residue_pair_extended_list4rna2 (CL, s1, r1, s2,r2);
-}
-int residue_pair_extended_list4rna1 (Constraint_list *CL, int s1, int r1, int s2, int r2 )
-{
- static Constraint_list *R;
- if (!R)R=read_rna_lib (CL->S, CL->rna_lib);
- return residue_pair_extended_list4rna (CL, R, s1, r1, s2, r2);
-}
-
-int residue_pair_extended_list4rna3 (Constraint_list *CL,int s1, int r1, int s2, int r2 )
-{
- static Constraint_list *R;
- if (!R)
- {
- R=read_rna_lib (CL->S, CL->rna_lib);
- rna_lib_extension (CL,R);
- }
- return residue_pair_extended_list (CL, s1,r1, s2,r2);
-}
-
-int residue_pair_extended_list4rna2 (Constraint_list *CL,int s1, int r1, int s2, int r2 )
-{
- static Constraint_list *R;
-
-
- if (!R)
- {
-
- R=read_rna_lib (CL->S, CL->rna_lib);
- rna_lib_extension (CL,R);
-
- }
-
- return residue_pair_extended_list4rna (CL, R, s1, r1, s2, r2);
-}
-int residue_pair_extended_list4rna ( Constraint_list *CL,Constraint_list *R, int s1, int r1, int s2, int r2 )
-{
-
- int a, b, n1, n2;
- int list1[100];
- int list2[100];
- int score=0, score2;
-
-
-
- if ( r1<0 || r2<0)return 0;
- n1=n2=0;
-
- list1[n1++]=r1;
- for (a=1; a<R->residue_index[s1][r1][0]; a+=ICHUNK)
- {
- list1[n1++]=R->residue_index[s1][r1][a+R2];
- }
-
-
- list2[n2++]=r2;
- for (a=1; a<R->residue_index[s2][r2][0]; a+=ICHUNK)
- {
- list2[n2++]=R->residue_index[s2][r2][a+R2];
- }
-
-
- score=residue_pair_extended_list ( CL, s1,list1[0], s2,list2[0]);
-
- for (score2=0,a=1; a<n1; a++)
- for ( b=1; b<n2; b++)
- score2=MAX(residue_pair_extended_list ( CL, s1,list1[a], s2,list2[b]), score2);
-
- if ( n1==1 || n2==1);
- else score=MAX(score,score2);
-
- return score;
-}
-
-
-int residue_pair_extended_list4rna_ref ( Constraint_list *CL, int s1, int r1, int s2, int r2 )
-{
- static Constraint_list *R;
- int a, b, n1, n2;
- int list1[100];
- int list2[100];
- int score=0, score2;
-
- if (R==NULL)
- {
- char **list;
- int n,a;
- list=read_lib_list ( CL->rna_lib, &n);
-
- R=declare_constraint_list ( CL->S,NULL, NULL, 0,NULL, NULL);
-
- for (a=0; a< n; a++)
- {
- R=read_constraint_list_file (R, list[a]);
- }
-
- }
-
- if ( r1<0 || r2<0)return 0;
- n1=n2=0;
-
- list1[n1++]=r1;
- for (a=1; a<R->residue_index[s1][r1][0]; a+=ICHUNK)
- {
- list1[n1++]=R->residue_index[s1][r1][a+R2];
- }
-
-
- list2[n2++]=r2;
- for (a=1; a<R->residue_index[s2][r2][0]; a+=ICHUNK)
- {
- list2[n2++]=R->residue_index[s2][r2][a+R2];
- }
-
-
- score=residue_pair_extended_list ( CL, s1,list1[0], s2,list2[0]);
-
- for (score2=0,a=1; a<n1; a++)
- for ( b=1; b<n2; b++)
- score2=MAX(residue_pair_extended_list ( CL, s1,list1[a], s2,list2[b]), score2);
-
- if ( n1==1 || n2==1);
- else score=MAX(score,score2);
-
- return score;
-}
-
-static int ** clean_residue_pair_hasch (int s1, int r1, int s2, int r2,int **hasch, Constraint_list *CL);
-int residue_pair_extended_list_raw ( Constraint_list *CL, int s1, int r1, int s2, int r2 )
- {
- double score=0;
-
-
-
- int a, t_s, t_r;
- static int **hasch;
- static int max_len;
- int field;
- double delta;
- /*
-
- function documentation: start
-
- int residue_pair_extended_list ( Constraint_list *CL, int s1, int r1, int s2, int r2);
-
- Computes the extended score for aligning residue seq1(r1) Vs seq2(r2)
- Computes: matrix_score
- non extended score
- extended score
-
- The extended score depends on the function index_res_constraint_list.
- This function can compare a sequence with itself.
-
- Associated functions: See util constraint list, list extention functions.
-
- function documentation: end
- */
-
- field=CL->weight_field;
- field=WE;
-
- if ( r1<=0 || r2<=0)return 0;
- if ( !hasch || max_len!=(CL->S)->max_len)
- {
- max_len=(CL->S)->max_len;
- if ( hasch) free_int ( hasch, -1);
- hasch=declare_int ( (CL->S)->nseq, (CL->S)->max_len+1);
- }
-
-
-
- /* Check matches for R1 in the indexed lib*/
- hasch[s1][r1]=FORBIDEN;
- for (a=1; a< CL->residue_index[s1][r1][0]; a+=ICHUNK)
- {
- t_s=CL->residue_index[s1][r1][a+SEQ2];
- t_r=CL->residue_index[s1][r1][a+R2];
- hasch[t_s][t_r]=CL->residue_index[s1][r1][a+field];
- }
-
- /*Check Matches for r1 <-> r2 in the indexed lib */
- for (a=1; a< CL->residue_index[s2][r2][0]; a+=ICHUNK)
- {
- t_s=CL->residue_index[s2][r2][a+SEQ2];
- t_r=CL->residue_index[s2][r2][a+R2];
-
-
- if (hasch[t_s][t_r])
- {
- if (hasch[t_s][t_r]==FORBIDEN)
- {
- score+=CL->residue_index[s2][r2][a+field];
- }
- else
- {
- delta=MIN(hasch[t_s][t_r],CL->residue_index[s2][r2][a+field]);
- score+=delta;
- }
- }
- }
-
- clean_residue_pair_hasch ( s1, r1,s2, r2, hasch, CL);
- return score;
- }
-int residue_pair_extended_list_4gp ( Constraint_list *CL, int s1, int r1, int s2, int r2 )
- {
- double score=0;
-
-
-
- int a, t_s, t_r;
- static int **hasch;
- static int max_len;
- int field;
- double delta;
-
- /*
-
- function documentation: start
-
- int residue_pair_extended_list ( Constraint_list *CL, int s1, int r1, int s2, int r2);
-
- Computes the extended score for aligning residue seq1(r1) Vs seq2(r2)
- Computes: matrix_score
- non extended score
- extended score
-
- The extended score depends on the function index_res_constraint_list.
- This function can compare a sequence with itself.
-
- Associated functions: See util constraint list, list extention functions.
-
- function documentation: end
- */
-
- field=CL->weight_field;
- field=WE;
-
- if ( r1<=0 || r2<=0)return 0;
- if ( !hasch || max_len!=(CL->S)->max_len)
- {
- max_len=(CL->S)->max_len;
- if ( hasch) free_int ( hasch, -1);
- hasch=declare_int ( (CL->S)->nseq, (CL->S)->max_len+1);
- }
-
-
-
- /* Check matches for R1 in the indexed lib*/
- hasch[s1][r1]=FORBIDEN;
- for (a=1; a< CL->residue_index[s1][r1][0]; a+=ICHUNK)
- {
- t_s=CL->residue_index[s1][r1][a+SEQ2];
- t_r=CL->residue_index[s1][r1][a+R2];
- hasch[t_s][t_r]=CL->residue_index[s1][r1][a+field];
- }
-
- /*Check Matches for r1 <-> r2 in the indexed lib */
- for (a=1; a< CL->residue_index[s2][r2][0]; a+=ICHUNK)
- {
- t_s=CL->residue_index[s2][r2][a+SEQ2];
- t_r=CL->residue_index[s2][r2][a+R2];
-
-
- if (hasch[t_s][t_r])
- {
- if (hasch[t_s][t_r]==FORBIDEN)
- {
- score+=((float)CL->residue_index[s2][r2][a+2]/NORM_F);
- }
- else
- {
- delta=MAX((((float)hasch[t_s][t_r]/NORM_F)),(((float)CL->residue_index[s2][r2][a+field]/NORM_F)));
- score+=delta;
- }
- }
- }
-
- clean_residue_pair_hasch ( s1, r1,s2, r2, hasch, CL);
- score/=(CL->S)->nseq;
- score *=NORM_F;
-
- return score;
- }
-
-int residue_pair_extended_list_pc ( Constraint_list *CL, int s1, int r1, int s2, int r2 )
- {
- //old norm does not take into account the number of effective intermediate sequences
-
- double score=0;
- int a, t_s, t_r;
- static int **hasch;
- int nclean;
- static int max_len;
- int field;
- double delta;
- float norm1=0;
- float norm2=0;
-
- /*
-
- function documentation: start
-
- int residue_pair_extended_list ( Constraint_list *CL, int s1, int r1, int s2, int r2);
-
- Computes the extended score for aligning residue seq1(r1) Vs seq2(r2)
- Computes: matrix_score
- non extended score
- extended score
-
- The extended score depends on the function index_res_constraint_list.
- This function can compare a sequence with itself.
-
- Associated functions: See util constraint list, list extention functions.
-
- function documentation: end
- */
-
- field=CL->weight_field;
- field=WE;
-
- if ( r1<=0 || r2<=0)return 0;
- if ( !hasch || max_len!=(CL->S)->max_len)
- {
- max_len=(CL->S)->max_len;
- if ( hasch) free_int ( hasch, -1);
- hasch=declare_int ( (CL->S)->nseq, (CL->S)->max_len+1);
-
- }
-
-
- /* Check matches for R1 in the indexed lib*/
- hasch[s1][r1]=FORBIDEN;
- for (a=1; a< CL->residue_index[s1][r1][0]; a+=ICHUNK)
- {
- t_s=CL->residue_index[s1][r1][a+SEQ2];
- t_r=CL->residue_index[s1][r1][a+R2];
- hasch[t_s][t_r]=CL->residue_index[s1][r1][a+field];
- norm1++;
-
- }
-
-
- /*Check Matches for r1 <-> r2 in the indexed lib */
- for (a=1; a< CL->residue_index[s2][r2][0]; a+=ICHUNK)
- {
- t_s=CL->residue_index[s2][r2][a+SEQ2];
- t_r=CL->residue_index[s2][r2][a+R2];
- norm2++;
-
- if (hasch[t_s][t_r])
- {
- if (hasch[t_s][t_r]==FORBIDEN)
- {
- score+=((float)CL->residue_index[s2][r2][a+field]/NORM_F);
- }
- else
- {
- delta=MIN((((float)hasch[t_s][t_r]/NORM_F)),(((float)CL->residue_index[s2][r2][a+field]/NORM_F)));
- score+=delta;
- }
- }
- }
-
- clean_residue_pair_hasch ( s1, r1,s2, r2, hasch, CL);
-
- //New Normalization
- norm1=MIN(norm1,norm2);
-
- //Old Normailzation: on the number of sequences, useless when not doiang an all against all
- //norm1=(CL->S)->nseq;
-
- score=(norm1)?score/norm1:0;
-
-
- return score*NORM_F;
- }
-int residue_pair_extended_list_pc_old ( Constraint_list *CL, int s1, int r1, int s2, int r2 )
- {
- //old norm does not take into account the number of effective intermediate sequences
-
- double score=0;
- int a, t_s, t_r;
- static int **hasch;
-
- static int max_len;
- int field;
- double delta;
-
- /*
-
- function documentation: start
-
- int residue_pair_extended_list ( Constraint_list *CL, int s1, int r1, int s2, int r2);
-
- Computes the extended score for aligning residue seq1(r1) Vs seq2(r2)
- Computes: matrix_score
- non extended score
- extended score
-
- The extended score depends on the function index_res_constraint_list.
- This function can compare a sequence with itself.
-
- Associated functions: See util constraint list, list extention functions.
-
- function documentation: end
- */
-
- field=CL->weight_field;
- field=WE;
-
- if ( r1<=0 || r2<=0)return 0;
- if ( !hasch || max_len!=(CL->S)->max_len)
- {
- max_len=(CL->S)->max_len;
- if ( hasch) free_int ( hasch, -1);
- hasch=declare_int ( (CL->S)->nseq, (CL->S)->max_len+1);
- }
-
-
- /* Check matches for R1 in the indexed lib*/
- hasch[s1][r1]=FORBIDEN;
- for (a=1; a< CL->residue_index[s1][r1][0]; a+=ICHUNK)
- {
- t_s=CL->residue_index[s1][r1][a+SEQ2];
- t_r=CL->residue_index[s1][r1][a+R2];
- hasch[t_s][t_r]=CL->residue_index[s1][r1][a+field];
- }
-
-
- /*Check Matches for r1 <-> r2 in the indexed lib */
- for (a=1; a< CL->residue_index[s2][r2][0]; a+=ICHUNK)
- {
- t_s=CL->residue_index[s2][r2][a+SEQ2];
- t_r=CL->residue_index[s2][r2][a+R2];
-
-
- if (hasch[t_s][t_r])
- {
- if (hasch[t_s][t_r]==FORBIDEN)
- {
- score+=((float)CL->residue_index[s2][r2][a+2]/NORM_F);
- }
- else
- {
- delta=MIN((((float)hasch[t_s][t_r]/NORM_F)),(((float)CL->residue_index[s2][r2][a+field]/NORM_F)));
- score+=delta;
- }
- }
- }
-
- clean_residue_pair_hasch ( s1, r1,s2, r2, hasch, CL);
- score/=(CL->S)->nseq;
- return score*NORM_F;
- }
-
-
-int residue_pair_extended_list ( Constraint_list *CL, int s1, int r1, int s2, int r2 )
- {
- double score=0;
- double max_score=0;
- double max_val=0;
-
- int a, t_s, t_r;
- static int **hasch;
- static int max_len;
- int field;
-
- /*
- new function: self normalized
- function documentation: start
-
- int residue_pair_extended_list ( Constraint_list *CL, int s1, int r1, int s2, int r2);
-
- Computes the extended score for aligning residue seq1(r1) Vs seq2(r2)
- Computes: matrix_score
- non extended score
- extended score
-
- The extended score depends on the function index_res_constraint_list.
- This function can compare a sequence with itself.
-
- Associated functions: See util constraint list, list extention functions.
-
- function documentation: end
- */
-
- field=CL->weight_field;
- field=WE;
-
- if ( r1<=0 || r2<=0)return 0;
- if ( !hasch || max_len!=(CL->S)->max_len)
- {
- max_len=(CL->S)->max_len;
- if ( hasch) free_int ( hasch, -1);
- hasch=declare_int ( (CL->S)->nseq, (CL->S)->max_len+1);
- }
-
-
-
- /* Check matches for R1 in the indexed lib*/
- hasch[s1][r1]=FORBIDEN;
- for (a=1; a< CL->residue_index[s1][r1][0]; a+=ICHUNK)
- {
- t_s=CL->residue_index[s1][r1][a+SEQ2];
- t_r=CL->residue_index[s1][r1][a+R2];
- hasch[t_s][t_r]=CL->residue_index[s1][r1][a+field];
- max_score+=CL->residue_index[s1][r1][a+field];
- }
-
- /*Check Matches for r1 <-> r2 in the indexed lib */
- for (a=1; a< CL->residue_index[s2][r2][0]; a+=ICHUNK)
- {
- t_s=CL->residue_index[s2][r2][a+SEQ2];
- t_r=CL->residue_index[s2][r2][a+R2];
-
-
- if (hasch[t_s][t_r])
- {
- if (hasch[t_s][t_r]==FORBIDEN)
- {
- score+=CL->residue_index[s2][r2][a+field];
- max_score+=CL->residue_index[s2][r2][a+field];
- }
- else
- {
- double delta;
- delta=MIN(hasch[t_s][t_r],CL->residue_index[s2][r2][a+field]);
-
- score+=delta;
- max_score-=hasch[t_s][t_r];
- max_score+=delta;
- max_val=MAX(max_val,delta);
- }
- }
- else
- {
- max_score+=CL->residue_index[s2][r2][a+field];
- }
- }
-
- max_score-=hasch[s2][r2];
- clean_residue_pair_hasch ( s1, r1,s2, r2, hasch, CL);
-
-
- if ( max_score==0)score=0;
- else if ( CL->normalise)
- {
- score=((score*CL->normalise)/max_score)*SCORE_K;
- if (max_val> CL->normalise)
- {
- score*=max_val/(double)CL->normalise;
- }
- }
- return (int) score;
- }
-int ** clean_residue_pair_hasch (int s1, int r1, int s2, int r2,int **hasch, Constraint_list *CL)
- {
- int a, t_s, t_r;
- if ( !hasch) return hasch;
-
- for (a=1; a< CL->residue_index[s1][r1][0]; a+=ICHUNK)
- {
- t_s=CL->residue_index[s1][r1][a+SEQ2];
- t_r=CL->residue_index[s1][r1][a+R2];
- hasch[t_s][t_r]=0;
- }
- hasch[s1][r1]=hasch[s2][r2]=0;
- return hasch;
- }
-
-int residue_pair_test_function ( Constraint_list *CL, int s1, int r1, int s2, int r2 )
- {
- double score=0;
-
- int a, t_s, t_r;
- static int **hasch;
- static int max_len;
- int cons1;
- int cons2;
-
- int field;
- /*
- function documentation: start
-
- int residue_pair_extended_list ( Constraint_list *CL, int s1, int r1, int s2, int r2);
-
- Computes the extended score for aligning residue seq1(r1) Vs seq2(r2)
- Computes: matrix_score
- non extended score
- extended score
-
- The extended score depends on the function index_res_constraint_list.
- This function can compare a sequence with itself.
-
- Associated functions: See util constraint list, list extention functions.
-
- function documentation: end
- */
-
-
- CL->weight_field=WE;
- field=CL->weight_field;
-
-
- if ( r1<=0 || r2<=0)return 0;
- if ( !hasch || max_len!=(CL->S)->max_len)
- {
- max_len=(CL->S)->max_len;
- if ( hasch) free_int ( hasch, -1);
- hasch=declare_int ( (CL->S)->nseq, (CL->S)->max_len+1);
- }
-
-
-
- hasch[s1][r1]=1000;
- for (a=1; a< CL->residue_index[s1][r1][0]; a+=ICHUNK)
- {
- t_s=CL->residue_index[s1][r1][a+SEQ2];
- t_r=CL->residue_index[s1][r1][a+R2];
- hasch[t_s][t_r]=CL->residue_index[s1][r1][a+field];
- }
-
-
- for (a=1; a< CL->residue_index[s2][r2][0]; a+=ICHUNK)
- {
- t_s=CL->residue_index[s2][r2][a+SEQ2];
- t_r=CL->residue_index[s2][r2][a+R2];
- if (hasch[t_s][t_r])
- {
- cons1=hasch[t_s][t_r];
- cons2=CL->residue_index[s2][r2][a+field];
- score +=MIN(cons1,cons2);
- }
- }
-
-
- score=(CL->normalise)?((score*CL->normalise)/CL->max_ext_value):score;
-
- for (a=1; a< CL->residue_index[s1][r1][0]; a+=ICHUNK)
- {
- t_s=CL->residue_index[s1][r1][a+SEQ2];
- t_r=CL->residue_index[s1][r1][a+R2];
- hasch[t_s][t_r]=0;
- }
- hasch[s1][r1]=hasch[s2][r2]=0;
-
-
- return (int)(score*SCORE_K);
- }
-
-int residue_pair_relative_extended_list ( Constraint_list *CL, int s1, int r1, int s2, int r2 )
- {
- int a, t_s, t_r;
- static int **hasch;
- static int max_len;
- int score=0;
- int total_score=0;
- int field;
- /*
- function documentation: start
-
- int residue_pair_extended_list ( Constraint_list *CL, int s1, int r1, int s2, int r2);
-
- Computes the extended score for aligning residue seq1(r1) Vs seq2(r2)
- Computes: matrix_score
- non extended score
- extended score
-
- The extended score depends on the function index_res_constraint_list.
- This function can compare a sequence with itself.
-
- Associated functions: See util constraint list, list extention functions.
-
- function documentation: end
- */
-
-
-
- field=CL->weight_field;
- field=WE;
-
- if ( r1<=0 || r2<=0)return 0;
- if ( !hasch || max_len!=(CL->S)->max_len)
- {
- max_len=(CL->S)->max_len;
- if ( hasch) free_int ( hasch, -1);
- hasch=declare_int ( (CL->S)->nseq, (CL->S)->max_len+1);
- }
-
-
-
- hasch[s1][r1]=100000;
- for (a=1; a< CL->residue_index[s1][r1][0]; a+=ICHUNK)
- {
- t_s=CL->residue_index[s1][r1][a+SEQ2];
- t_r=CL->residue_index[s1][r1][a+R2];
- hasch[t_s][t_r]=CL->residue_index[s1][r1][a+field];
- total_score+=CL->residue_index[s1][r1][a+field];
- }
-
-
- for (a=1; a< CL->residue_index[s2][r2][0]; a+=ICHUNK)
- {
- t_s=CL->residue_index[s2][r2][a+SEQ2];
- t_r=CL->residue_index[s2][r2][a+R2];
- total_score+=CL->residue_index[s1][r1][a+field];
- if (hasch[t_s][t_r])
- {
- if (field==WE){score+=2*MIN(hasch[t_s][t_r],CL->residue_index[s2][r2][a+field]);}
- }
- }
-
- score=((CL->normalise*score)/total_score);
-
-
- for (a=1; a< CL->residue_index[s1][r1][0]; a+=ICHUNK)
- {
- t_s=CL->residue_index[s1][r1][a+SEQ2];
- t_r=CL->residue_index[s1][r1][a+R2];
- hasch[t_s][t_r]=0;
- }
- hasch[s1][r1]=hasch[s2][r2]=0;
-
- return score*SCORE_K;
- }
-int residue_pair_extended_list_g_coffee_quadruplet ( Constraint_list *CL, int s1, int r1, int s2, int r2 )
-{
- int t_s, t_r, t_w, q_s, q_r, q_w;
- int a, b;
- static int **hasch;
- int score=0, s=0;
-
- int field;
- /* This measure the quadruplets cost on a pair of residues*/
-
- field=CL->weight_field;
-
- if ( r1<=0 || r2<=0)return 0;
- if ( !hasch)
- {
- hasch=vcalloc ( (CL->S)->nseq, sizeof (int*));
- for ( a=0; a< (CL->S)->nseq; a++)hasch[a]=vcalloc ( (CL->S)->len[a]+1, sizeof (int));
- }
-
-
- hasch[s1][r1]=100000;
- for (a=1; a< CL->residue_index[s1][r1][0]; a+=ICHUNK)
- {
- t_s=CL->residue_index[s1][r1][a+SEQ2];
- t_r=CL->residue_index[s1][r1][a+R2];
- t_w=CL->residue_index[s1][r1][a+field];
- if ( CL->seq_for_quadruplet[t_s])
- {
- for ( b=1; b<CL->residue_index[t_s][t_r][0]; b+=ICHUNK)
- {
- q_s=CL->residue_index[t_s][t_r][b+SEQ2];
- q_r=CL->residue_index[t_s][t_r][b+R2];
- if (CL-> seq_for_quadruplet[q_s])
- {
-
- hasch[q_s][q_r]=MIN(CL->residue_index[t_s][t_r][b+2],t_w);
- }
- }
- }
- }
-
-
- for (s=0,score=0,a=1; a< CL->residue_index[s2][r2][0]; a+=ICHUNK)
- {
- t_s=CL->residue_index[s2][r2][a+SEQ2];
- t_r=CL->residue_index[s2][r2][a+R2];
- t_w=CL->residue_index[s2][r2][a+field];
- if ( CL->seq_for_quadruplet[t_s])
- {
- for ( b=1; b<CL->residue_index[t_s][t_r][0]; b+=ICHUNK)
- {
- q_s=CL->residue_index[t_s][t_r][b+SEQ2];
- q_r=CL->residue_index[t_s][t_r][b+R2];
- q_w=CL->residue_index[t_s][t_r][b+field];
- if (hasch[q_s][q_r] && CL->seq_for_quadruplet[q_s])
- s=MIN(hasch[q_s][q_r],MIN(CL->residue_index[t_s][t_r][b+2],q_w));
- score=MAX(score, s);
- }
- }
- }
-
- for (a=1; a< CL->residue_index[s1][r1][0]; a+=ICHUNK)
- {
- t_s=CL->residue_index[s1][r1][a+SEQ2];
- t_r=CL->residue_index[s1][r1][a+R2];
- t_w=CL->residue_index[s1][r1][a+field];
- if ( CL->seq_for_quadruplet[t_s])
- {
- for ( b=1; b<CL->residue_index[t_s][t_r][0]; b+=ICHUNK)
- {
- q_s=CL->residue_index[t_s][t_r][b+SEQ2];
- q_r=CL->residue_index[t_s][t_r][b+R2];
- hasch[q_s][q_r]=0;
- }
- }
- }
-
- return score*SCORE_K;
- }
-int residue_pair_extended_list_g_coffee ( Constraint_list *CL, int s1, int r1, int s2, int r2 )
- {
- int a, t_s, t_r;
- static int **hasch;
- int score=0,s;
-
- int field;
- /*
- function documentation: start
-
- int residue_pair_extended_list ( Constraint_list *CL, int s1, int r1, int s2, int r2);
-
- Computes the extended score for aligning residue seq1(r1) Vs seq2(r2)
- Computes: matrix_score
- non extended score
- extended score
-
- The extended score depends on the function index_res_constraint_list.
- This function can compare a sequence with itself.
-
- Associated functions: See util constraint list, list extention functions.
-
- function documentation: end
- */
-
- field=CL->weight_field;
-
- if ( r1<=0 || r2<=0)return 0;
- if ( !hasch)
- {
- hasch=vcalloc ( (CL->S)->nseq, sizeof (int*));
- for ( a=0; a< (CL->S)->nseq; a++)hasch[a]=vcalloc ( (CL->S)->len[a]+1, sizeof (int));
- }
-
-
-
- hasch[s1][r1]=100000;
- for (a=1; a< CL->residue_index[s1][r1][0]; a+=ICHUNK)
- {
- t_s=CL->residue_index[s1][r1][a+SEQ2];
- t_r=CL->residue_index[s1][r1][a+R2];
- hasch[t_s][t_r]=CL->residue_index[s1][r1][a+field];
- }
-
-
- for (s=0, score=0,a=1; a< CL->residue_index[s2][r2][0]; a+=ICHUNK)
- {
- t_s=CL->residue_index[s2][r2][a+SEQ2];
- t_r=CL->residue_index[s2][r2][a+R2];
-
- if (hasch[t_s][t_r])
- {
- if (field==WE)
- {s=MIN(hasch[t_s][t_r],CL->residue_index[s2][r2][a+WE]);
- score=MAX(s,score);
- }
- }
- }
-
- for (a=1; a< CL->residue_index[s1][r1][0]; a+=ICHUNK)
- {
- t_s=CL->residue_index[s1][r1][a+SEQ2];
- t_r=CL->residue_index[s1][r1][a+R2];
- hasch[t_s][t_r]=0;
- }
- hasch[s1][r1]=hasch[s2][r2]=0;
-
- return score*SCORE_K;
- }
-
-int extend_residue_pair ( Constraint_list *CL, int s1, int r1, int s2, int r2)
- {
- double score=0;
-
- int a, t_s, t_r, p;
- static int **hasch;
-
- static int *entry;
- int *r;
- int field;
-
-
-
- /*
- This is the generic Function->works with everything
- should be gradually phased out
-
-
- int extend_residue_pair ( Constraint_list *CL, int s1, int r1, int s2, int r2, int field )
-
- Computes the extended score for aligning residue seq1(r1) Vs seq2(r2)
- Computes: matrix_score
- non extended score
- extended score
-
- The extended score depends on the function index_res_constraint_list.
- This function can compare a sequence with itself.
-
- Associated functions: See util constraint list, list extention functions.
- function documentation: end
- */
-
-
- field=CL->weight_field;
-
- if ( r1<=0 || r2<=0)return 0;
- else if ( !CL->residue_index && CL->M)
- {
- return evaluate_matrix_score (CL, s1,r1, s2, r2);
- }
-
- else if ( !CL->extend_jit)
- {
- if ( !entry) entry=vcalloc (LIST_N_FIELDS , sizeof (int));
- entry[SEQ1]=s1;
- entry[SEQ2]=s2;
- entry[R1]=r1;
- entry[R2]=r2;
- r=main_search_in_list_constraint( entry,&p,4,CL);
- if (r==NULL)return 0;
- else return r[field];
- }
- else
- {
- if ( !hasch)
- {
- hasch=vcalloc ( (CL->S)->nseq, sizeof (int*));
- for ( a=0; a< (CL->S)->nseq; a++)hasch[a]=vcalloc ( (CL->S)->len[a]+1, sizeof (int));
- }
-
-
-
- hasch[s1][r1]=100000;
- for (a=1; a< CL->residue_index[s1][r1][0]; a+=ICHUNK)
- {
- t_s=CL->residue_index[s1][r1][a+SEQ2];
- t_r=CL->residue_index[s1][r1][a+R2];
- hasch[t_s][t_r]=CL->residue_index[s1][r1][a+WE];
- }
-
-
- for (a=1; a< CL->residue_index[s2][r2][0]; a+=ICHUNK)
- {
- t_s=CL->residue_index[s2][r2][a+SEQ2];
- t_r=CL->residue_index[s2][r2][a+R2];
- if (hasch[t_s][t_r])
- {
- if (field==WE)score+=MIN(hasch[t_s][t_r],CL->residue_index[s2][r2][a+WE] );
-
- }
- }
- score=(CL->normalise)?((score*CL->normalise)/CL->max_ext_value):score;
- for (a=1; a< CL->residue_index[s1][r1][0]; a+=ICHUNK)
- {
- t_s=CL->residue_index[s1][r1][a+SEQ2];
- t_r=CL->residue_index[s1][r1][a+R2];
- hasch[t_s][t_r]=0;
- }
- hasch[s1][r1]=hasch[s2][r2]=0;
-
- return (int)(score*SCORE_K);
- }
- }
-/*********************************************************************************************/
-/* */
-/* FUNCTIONS FOR GETTING THE PW COST : CL->get_dp_cost */
-/* */
-/*********************************************************************************************/
-int get_dp_cost_blosum_matrix (Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL)
-{
- int s1, r1, s2, r2;
- static int **matrix;
-
- if (!matrix) matrix=read_matrice ("blosum62mt");
- s1=A->order[list1[0]][0];
- s2=A->order[list2[0]][0];
- r1=pos1[list1[0]][col1];
- r2=pos2[list2[0]][col2];
-
- /*dp cost function: works only with two sequences*/
-
- if ( seq2R_template_profile (CL->S,s1) ||seq2R_template_profile (CL->S,s2))
- return evaluate_aln_profile_score ( CL, s1,r1, s2, r2) -CL->nomatch*SCORE_K;
- else if (r1>0 && r2>0)
- {
- r1--;
- r2--;
-
-
- return matrix [(CL->S)->seq[s1][r1]-'A'][(CL->S)->seq[s2][r2]-'A']*SCORE_K -CL->nomatch*SCORE_K;
-
- }
- else
- return -CL->nomatch*SCORE_K ;
-}
-int get_dp_cost_pam_matrix (Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL)
-{
- int s1, r1, s2, r2;
- static int **matrix;
-
- if (!matrix) matrix=read_matrice ("pam250mt");
- s1=A->order[list1[0]][0];
- s2=A->order[list2[0]][0];
- r1=pos1[list1[0]][col1];
- r2=pos2[list2[0]][col2];
-
- /*dp cost function: works only with two sequences*/
-
-
- if ( seq2R_template_profile (CL->S,s1) ||seq2R_template_profile (CL->S,s2))
- return evaluate_aln_profile_score ( CL, s1,r1, s2, r2) -CL->nomatch*SCORE_K;
- else if (r1>0 && r2>0)
- {
- r1--;
- r2--;
-
-
- return matrix [(CL->S)->seq[s1][r1]-'A'][(CL->S)->seq[s2][r2]-'A']*SCORE_K -CL->nomatch*SCORE_K;
-
- }
- else
- return -CL->nomatch*SCORE_K ;
-}
-
-int get_dp_cost_pw_matrix (Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL)
-{
- int s1, r1, s2, r2;
-
- s1=A->order[list1[0]][0];
- s2=A->order[list2[0]][0];
- r1=pos1[list1[0]][col1];
- r2=pos2[list2[0]][col2];
-
- /*dp cost function: works only with two sequences*/
- if ( seq2R_template_profile (CL->S,s1) ||seq2R_template_profile (CL->S,s2))
- return evaluate_aln_profile_score ( CL, s1,r1, s2, r2) -CL->nomatch*SCORE_K;
- else if (r1>0 && r2>0)
- {
- r1--;
- r2--;
-
-
- return CL->M[(CL->S)->seq[s1][r1]-'A'][(CL->S)->seq[s2][r2]-'A']*SCORE_K -CL->nomatch*SCORE_K;
-
- }
- else
- return -CL->nomatch*SCORE_K ;
-}
-
-/*********************************************************************************************/
-/* */
-/* FUNCTIONS FOR GETTING THE COST : CL->get_dp_cost */
-/* */
-/*********************************************************************************************/
-
-
-
-int get_cdna_best_frame_dp_cost (Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL)
- {
- int a, b;
- int n=4;
- int s;
- char a1, a2;
- static int l1, l2;
- static Alignment *B;
- static int **score;
-
- if ( !score)score=declare_int(3, 2);
-
- if (!A)
- {
- free_aln(B);
- B=NULL;
- return UNDEFINED;
- }
- if (!B)
- {
- if (ns1+ns2>2){fprintf ( stderr, "\nERROR: get_cdna_dp_cost mode is only for pair-wise ALN [FATAL]\n");crash("");}
- free_aln (B);
- B=copy_aln (A, NULL);
-
- l1=(int)strlen ( A->seq_al[list1[0]]);
- for ( b=0; b<l1; b++)
- B->seq_al[list1[0]][b]=translate_dna_codon (A->seq_al[list1[0]]+b, 'x');
- l2=(int)strlen ( A->seq_al[list2[0]]);
- for ( b=0; b<l2; b++)
- B->seq_al[list2[0]][b]=translate_dna_codon (A->seq_al[list2[0]]+b, 'x');
- }
-
-/*Set the frame*/
-
- for ( a=0; a< 3; a++)score[a][0]=score[a][1]=0;
- for ( a=col1-(n*3),b=col2-(n*3); a<col1+(n*3) ; a++, b++)
- {
- if ( a<0 || b<0 || a>=l1 || b>=l2)continue;
-
- a1=tolower(B->seq_al[list1[0]][a]);
- a2=tolower(B->seq_al[list2[0]][b]);
-
- score[a%3][0]+=(a1=='x' || a2=='x')?0:CL->M[a1-'A'][a2-'A'];
- score[a%3][1]++;
- }
-
- for ( a=0; a< 3; a++)score[a][0]=(score[a][1]>0)?(score[a][0]/score[a][1]):0;
- if ( score[0][0]>score[1][0] && score[0][0]>score[2][0])
- s=score[0][0];
- else if ( score[1][0]>score[0][0] && score[1][0]>score[2][0])
- s=score[1][0];
- else s=score[2][0];
-
- return s*SCORE_K;
-
- }
-
-int get_dp_cost_quadruplet ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL)
- {
- int score;
-
-
- if ( ns1==1 || ns2==1)
- score=slow_get_dp_cost ( A, pos1, ns1, list1,col1, pos2, ns2, list2, col2, CL);
- else
- score=fast_get_dp_cost_quadruplet ( A, pos1, ns1, list1,col1, pos2, ns2, list2, col2, CL);
-
- return (score==UNDEFINED)?UNDEFINED:(score-SCORE_K*CL->nomatch);
- }
-int get_dp_cost ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL)
- {
- int MODE=0;
- int score;
-
-
-
- if (A==NULL)return 0;
-
- if (MODE!=2 || MODE==0 || (!CL->residue_index && CL->M) || (!CL->residue_index && CL->T)|| ns1==1 || ns2==1)
- score=slow_get_dp_cost ( A, pos1, ns1, list1,col1, pos2, ns2, list2, col2, CL);
- else if (MODE==1 || MODE==2)
- score=fast_get_dp_cost ( A, pos1, ns1, list1,col1, pos2, ns2, list2, col2, CL);
- else
- score=0;
-
-
-
- return (score==UNDEFINED)?UNDEFINED:(score-SCORE_K*CL->nomatch);
- }
-int ***make_cw_lu (int **cons, int l, Constraint_list *CL);
-int ***make_cw_lu (int **cons, int l, Constraint_list *CL)
-{
- int ***lu;
- int p, a,r;
-
- lu=declare_arrayN(3, sizeof (int),l,26, 2);
- for ( p=0; p<l ; p++)
- {
- for (r=0; r<26; r++)
- {
- for (a=3; a<cons[p][1]; a+=3)
- {
- lu[p][r][0]+=cons[p][a+1]*CL->M[r][cons[p][a]-'A'];
- lu[p][r][1]+=cons[p][a+1];
- }
- }
- }
- return lu;
-}
-
-int cw_profile_get_dp_cost ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL)
-{
- static int last_tag;
- static int *pr, ***lu;
- int score;
- static int *list[2], ns[2], **cons[2], ref;
- int eva_col,ref_col, a, p, r;
- float t1, t2;
-
-
-
-
- if (last_tag!=A->random_tag)
- {
- int n1, n2;
-
- last_tag=A->random_tag;
- list[0]=list1;ns[0]=ns1;
- list[1]=list2;ns[1]=ns2;
- free_int (cons[0],-1);free_int (cons[1],-1);free_arrayN((void*)lu,3);
- cons[0]=NULL;cons[1]=NULL;lu=NULL;
-
- n1=sub_aln2nseq_prf (A, ns[0], list[0]);
- n2=sub_aln2nseq_prf (A, ns[1], list[1]);
- if ( n1>1 || n2>1)
- {
- cons[0]=sub_aln2count_mat2 (A, ns[0], list[0]);
- cons[1]=sub_aln2count_mat2 (A, ns[1], list[1]);
- ref=(ns[0]>ns[1])?0:1;
- lu=make_cw_lu(cons[ref],(int)strlen(A->seq_al[list[ref][0]]), CL);
- }
- }
-
- if (!lu)
- {
- char r1, r2;
- r1=A->seq_al[list1[0]][col1];
- r2=A->seq_al[list2[0]][col2];
- if ( r1!='-' && r2!='-')
- return CL->M[r1-'A'][r2-'A']*SCORE_K -SCORE_K*CL->nomatch;
- else
- return -SCORE_K*CL->nomatch;
- }
- else
- {
- eva_col= (ref==0)?col2:col1;
- ref_col= (ref==0)?col1:col2;
- pr=cons[1-ref][eva_col];
- t1=t2=0;
- for (a=3; a< pr[1]; a+=3)
- {
- r=tolower(pr[a]);
- p= pr[a+1];
-
- t1+=lu[ref_col][r-'a'][0]*p;
- t2+=lu[ref_col][r-'a'][1]*p;
- }
- score=(t2==0)?0:(t1*SCORE_K)/t2;
- score -=SCORE_K*CL->nomatch;
- return score;
- }
-}
-int cw_profile_get_dp_cost_old ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL)
-{
- static int last_tag;
- static int **cons1, **cons2;
- int score;
-
-
- if (last_tag!=A->random_tag)
- {
- last_tag=A->random_tag;
- free_int (cons1,-1);free_int (cons2,-1);
- cons1=sub_aln2count_mat2 (A, ns1, list1);
- cons2=sub_aln2count_mat2 (A, ns2, list2);
- }
- score=cw_profile_profile (cons1[col1], cons2[col2], CL)-SCORE_K*CL->nomatch;
- return score;
-}
-
-int cw_profile_get_dp_cost_window ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL)
-{
- static int last_tag;
- static int **cons1, **cons2;
- int a, score, window_size=5, n, p1, p2;
-
-
- if (last_tag!=A->random_tag)
- {
- last_tag=A->random_tag;
- free_int (cons1,-1);free_int (cons2,-1);
- cons1=sub_aln2count_mat2 (A, ns1, list1);
- cons2=sub_aln2count_mat2 (A, ns2, list2);
- }
-
- for (n=0,score=0,a=0; a<window_size; a++)
- {
- p1=col1-a;
- p2=col2-a;
- if ( p1<0 || cons1[p1][0]==END_ARRAY)continue;
- if ( p2<0 || cons2[p2][0]==END_ARRAY)continue;
- score+=cw_profile_profile (cons1[col1], cons2[col2], CL)-SCORE_K*CL->nomatch;
- n++;
- }
- if (n>0)score/=n;
-
- return score;
- }
-
-int consensus_get_dp_cost ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL)
- {
- static int last_tag;
- static char *seq1, *seq2;
-
-
- /*Works only with matrix*/
- if (last_tag !=A->random_tag)
- {
- int ls1, ls2, lenls1, lenls2;
-
- last_tag=A->random_tag;
- vfree (seq1);vfree (seq2);
- seq1=sub_aln2cons_seq_mat (A, ns1, list1, "blosum62mt");
- seq2=sub_aln2cons_seq_mat (A, ns2, list2, "blosum62mt");
- ls1=list1[ns1-1];ls2=list2[ns2-1];
- lenls1=(int)strlen (A->seq_al[ls1]); lenls2=(int)strlen (A->seq_al[ls2]);
- }
-
- return (CL->M[seq1[col1]-'A'][seq2[col2]-'A']*SCORE_K)-SCORE_K*CL->nomatch;
- }
-
-int fast_get_dp_cost_quadruplet ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL)
- {
- /*WARNING: WORKS ONLY WITH List to Extend*/
- /*That function does a quruple extension beween two columns by pooling the residues together*/
-
- double score=0;
-
- int a,b, c;
- int n_gap1=0;
- int n_gap2=0;
-
- int s1, rs1, r1, t_r, t_s,t_w, q_r, q_s, q_w, s2, rs2, r2;
- int **buf_pos, buf_ns, *buf_list, buf_col;
-
- static int **hasch1;
- static int **hasch2;
-
- static int **n_hasch1;
- static int **n_hasch2;
-
- static int **is_in_col1;
- static int **is_in_col2;
-
-
- if (ns2>ns1)
- {
- buf_pos=pos1;
- buf_ns=ns1;
- buf_list=list1;
- buf_col=col1;
-
- pos1=pos2;
- ns1=ns2;
- list1=list2;
- col1=col2;
-
- pos2=buf_pos;
- ns2=buf_ns;
- list2=buf_list;
- col2=buf_col;
- }
-
-
- if ( !hasch1)
- {
-
- hasch1=declare_int( (CL->S)->nseq, (CL->S)->max_len+1);
- hasch2=declare_int( (CL->S)->nseq, (CL->S)->max_len+1);
- n_hasch1=declare_int ( (CL->S)->nseq, (CL->S)->max_len+1);
- n_hasch2=declare_int( (CL->S)->nseq, (CL->S)->max_len+1);
- is_in_col1=declare_int( (CL->S)->nseq, (CL->S)->max_len+1);
- is_in_col2=declare_int( (CL->S)->nseq, (CL->S)->max_len+1);
- }
-
- for ( a=0; a< ns1; a++)
- {
- rs1= list1[a];
- s1=A->order[rs1][0];
- r1=pos1[rs1][col1];
-
- if (r1<0)n_gap1++;
- else
- {
- is_in_col1[s1][r1]=1;
- for (b=1; b< CL->residue_index[s1][r1][0]; b+=ICHUNK)
- {
- t_s=CL->residue_index[s1][r1][b+SEQ2];
- t_r=CL->residue_index[s1][r1][b+R2];
- t_w=CL->residue_index[s1][r1][b+WE];
- for ( c=1; c< CL->residue_index[t_s][t_r][0]; c+=ICHUNK)
- {
- q_s=CL->residue_index[t_s][t_r][c+SEQ2];
- q_r=CL->residue_index[t_s][t_r][c+R2];
- q_w=CL->residue_index[t_s][t_r][c+WE];
- hasch1[q_s][q_r]+=MIN(q_w, t_w);
- n_hasch1[q_s][q_r]++;
- }
- }
- }
- }
-
- for ( a=0; a< ns2; a++)
- {
- rs2=list2[a];
- s2=A->order[rs2][0];
- r2=pos2[rs2][col2];
-
- if (r2<0)n_gap2++;
- else
- {
- is_in_col2[s2][r2]=1;
- for (b=1; b< CL->residue_index[s2][r2][0]; b+=ICHUNK)
- {
- t_s=CL->residue_index[s2][r2][b+SEQ2];
- t_r=CL->residue_index[s2][r2][b+R2];
- t_w=CL->residue_index[s2][r2][b+WE];
- for ( c=1; c< CL->residue_index[t_s][t_r][0]; c+=ICHUNK)
- {
- q_s=CL->residue_index[t_s][t_r][c+SEQ2];
- q_r=CL->residue_index[t_s][t_r][c+R2];
- q_w=CL->residue_index[t_s][t_r][c+WE];
- hasch2[q_s][q_r]+=MIN(t_w, q_w);
- n_hasch2[q_s][q_r]++;
- }
- }
- }
- }
-
-
- for ( a=0; a< ns2; a++)
- {
- rs2=list2[a];
- s2=A->order[rs2][0];
- r2=pos1[rs2][col2];
-
- if (r2<0);
- else
- {
- for (b=1; b< CL->residue_index[s2][r2][0]; b+=ICHUNK)
- {
- t_s=CL->residue_index[s2][r2][b+SEQ2];
- t_r=CL->residue_index[s2][r2][b+R2];
-
- for ( c=1; c< CL->residue_index[t_s][t_r][0]; c+=ICHUNK)
- {
- q_s=CL->residue_index[t_s][t_r][c+SEQ2];
- q_r=CL->residue_index[t_s][t_r][c+R2];
- if ( hasch2[q_s][q_r] && hasch1[q_s][q_r]&& !(is_in_col1[q_s][q_r] || is_in_col2[q_s][q_r]))
- {
- score+=MIN(hasch2[q_s][q_r]*(n_hasch1[q_s][q_r]),hasch1[q_s][q_r]*(n_hasch2[q_s][q_r]));
- }
- else if ( hasch2[q_s][q_r] && is_in_col1[q_s][q_r])
- {
- score+=hasch2[q_s][q_r]*(n_hasch1[q_s][q_r]+1);
- }
- else if (hasch1[q_s][q_r] && is_in_col2[q_s][q_r])
- {
- score+=hasch1[q_s][q_r]*(n_hasch2[q_s][q_r]+1);
- }
- hasch2[q_s][q_r]=0;
- n_hasch2[q_s][q_r]=0;
- }
- }
- hasch2[s2][r2]=0;
- is_in_col2[s2][r2]=0;
- }
- }
-
-
- for ( a=0; a< ns1; a++)
- {
- rs1= list1[a];
- s1=A->order[rs1][0];
- r1=pos1[rs1][col1];
-
- if (r1<0);
- else
- {
- is_in_col1[s1][r1]=0;
- hasch1[s1][r1]=0;
- for (b=1; b< CL->residue_index[s1][r1][0]; b+=ICHUNK)
- {
- t_s=CL->residue_index[s1][r1][b+SEQ2];
- t_r=CL->residue_index[s1][r1][b+R2];
- for ( c=1; c< CL->residue_index[t_s][t_r][0]; c+=ICHUNK)
- {
- q_s=CL->residue_index[t_s][t_r][c+SEQ2];
- q_r=CL->residue_index[t_s][t_r][c+R2];
- hasch1[q_s][q_r]=0;
- n_hasch1[q_s][q_r]=0;
- }
- }
- }
- }
-
-
- score=(score*SCORE_K)/((ns1-n_gap1)*(ns2-n_gap2));
- score=(CL->normalise)?((score*CL->normalise)/CL->max_ext_value):score;
-
- return (int)(score-SCORE_K*CL->nomatch);
- }
-
-
-int fast_get_dp_cost ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL)
- {
- /*WARNING: WORKS ONLY WITH List to Extend*/
-
- double score=0;
-
- int a,b;
- int n_gap1=0;
- int n_gap2=0;
-
- int s1, rs1, r1, t_r, t_s, s2, rs2, r2;
- static int **hasch1;
- static int **hasch2;
-
- static int **n_hasch1;
- static int **n_hasch2;
-
- static int **is_in_col1;
- static int **is_in_col2;
-
-
-
-
- if ( !hasch1)
- {
-
- hasch1=declare_int( (CL->S)->nseq, (CL->S)->max_len+1);
- hasch2=declare_int( (CL->S)->nseq, (CL->S)->max_len+1);
- n_hasch1=declare_int ( (CL->S)->nseq, (CL->S)->max_len+1);
- n_hasch2=declare_int( (CL->S)->nseq, (CL->S)->max_len+1);
- is_in_col1=declare_int( (CL->S)->nseq, (CL->S)->max_len+1);
- is_in_col2=declare_int( (CL->S)->nseq, (CL->S)->max_len+1);
- }
-
- for ( a=0; a< ns1; a++)
- {
- rs1= list1[a];
- s1=A->order[rs1][0];
- r1=pos1[rs1][col1];
-
- if (r1<0)n_gap1++;
- else
- {
- is_in_col1[s1][r1]=1;
- for (b=1; b< CL->residue_index[s1][r1][0]; b+=ICHUNK)
- {
- t_s=CL->residue_index[s1][r1][b+SEQ2];
- t_r=CL->residue_index[s1][r1][b+R2];
- hasch1[t_s][t_r]+=CL->residue_index[s1][r1][b+WE];
- n_hasch1[t_s][t_r]++;
- }
- }
- }
-
-
- for ( a=0; a< ns2; a++)
- {
- rs2=list2[a];
- s2=A->order[rs2][0];
- r2=pos2[rs2][col2];
-
- if (r2<0)n_gap2++;
- else
- {
- is_in_col2[s2][r2]=1;
- for (b=1; b< CL->residue_index[s2][r2][0]; b+=ICHUNK)
- {
- t_s=CL->residue_index[s2][r2][b+SEQ2];
- t_r=CL->residue_index[s2][r2][b+R2];
-
- hasch2[t_s][t_r]+=CL->residue_index[s2][r2][b+WE];
- n_hasch2[t_s][t_r]++;
- }
- }
- }
- /*return 2;*/
-
- if ( ns2<ns1)
- {
- for ( a=0; a< ns2; a++)
- {
- rs2=list2[a];
- s2=A->order[rs2][0];
- r2=pos1[rs2][col2];
-
- if (r2<0);
- else
- {
- for (b=1; b< CL->residue_index[s2][r2][0]; b+=ICHUNK)
- {
- t_s=CL->residue_index[s2][r2][b+SEQ2];
- t_r=CL->residue_index[s2][r2][b+R2];
-
- if ( hasch2[t_s][t_r] && hasch1[t_s][t_r]&& !(is_in_col1[t_s][t_r] || is_in_col2[t_s][t_r]))
- {
- score+=MIN(hasch2[t_s][t_r]*(n_hasch1[t_s][t_r]),hasch1[t_s][t_r]*(n_hasch2[t_s][t_r]));
- }
- else if ( hasch2[t_s][t_r] && is_in_col1[t_s][t_r])
- {
- score+=hasch2[t_s][t_r]*(n_hasch1[t_s][t_r]+1);
- }
- else if (hasch1[t_s][t_r] && is_in_col2[t_s][t_r])
- {
- score+=hasch1[t_s][t_r]*(n_hasch2[t_s][t_r]+1);
- }
- hasch2[t_s][t_r]=0;
- n_hasch2[t_s][t_r]=0;
- }
- hasch2[s2][r2]=0;
- is_in_col2[s2][r2]=0;
- }
- }
-
-
- for ( a=0; a< ns1; a++)
- {
- rs1= list1[a];
- s1=A->order[rs1][0];
- r1=pos1[rs1][col1];
-
- if (r1<0);
- else
- {
- is_in_col1[s1][r1]=0;
- hasch1[s1][r1]=0;
- for (b=1; b< CL->residue_index[s1][r1][0]; b+=ICHUNK)
- {
- t_s=CL->residue_index[s1][r1][b+SEQ2];
- t_r=CL->residue_index[s1][r1][b+R2];
-
- hasch1[t_s][t_r]=0;
- n_hasch1[t_s][t_r]=0;
- }
- }
- }
- }
- else
- {
- for ( a=0; a< ns1; a++)
- {
- rs1=list1[a];
- s1=A->order[rs1][0];
- r1=pos1[rs1][col1];
-
- if (r1<0);
- else
- {
- for (b=1; b< CL->residue_index[s1][r1][0]; b+=ICHUNK)
- {
- t_s=CL->residue_index[s1][r1][b+SEQ2];
- t_r=CL->residue_index[s1][r1][b+R2];
-
- if ( hasch1[t_s][t_r] && hasch2[t_s][t_r]&& !(is_in_col2[t_s][t_r] || is_in_col1[t_s][t_r]))
- {
- score+=MIN(hasch1[t_s][t_r]*(n_hasch2[t_s][t_r]),hasch2[t_s][t_r]*(n_hasch1[t_s][t_r]));
- }
- else if ( hasch1[t_s][t_r] && is_in_col2[t_s][t_r])
- {
- score+=hasch1[t_s][t_r]*(n_hasch2[t_s][t_r]+1);
- }
- else if (hasch2[t_s][t_r] && is_in_col1[t_s][t_r])
- {
- score+=hasch2[t_s][t_r]*(n_hasch1[t_s][t_r]+1);
- }
- hasch1[t_s][t_r]=0;
- n_hasch1[t_s][t_r]=0;
- }
- hasch1[s1][r1]=0;
- is_in_col1[s1][r1]=0;
- }
- }
-
-
- for ( a=0; a< ns2; a++)
- {
- rs2= list2[a];
- s2=A->order[rs2][0];
- r2=pos1[rs2][col2];
-
- if (r2<0);
- else
- {
- is_in_col2[s2][r2]=0;
- hasch1[s2][r2]=0;
- for (b=1; b< CL->residue_index[s2][r2][0]; b+=ICHUNK)
- {
- t_s=CL->residue_index[s2][r2][b+SEQ2];
- t_r=CL->residue_index[s2][r2][b+R2];
-
- hasch2[t_s][t_r]=0;
- n_hasch2[t_s][t_r]=0;
- }
- }
- }
- }
- score=(score*SCORE_K)/((ns1-n_gap1)*(ns2-n_gap2));
- score=(CL->normalise)?((score*CL->normalise)/CL->max_ext_value):score;
-
- return (int)(score-SCORE_K*CL->nomatch);
- }
-
-int fast_get_dp_cost_2 ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL)
- {
- double score=0;
-
- int a, b, s1, s2,r1, r2;
- static int n_pair;
-
- int s;
- int res_res=0;
- int rs1, rs2;
- static int last_ns1;
- static int last_ns2;
- static int last_top1;
- static int last_top2;
- static int **check_list;
-
-
- /*New heuristic get dp_cost on a limited number of pairs*/
- /*This is the current default*/
-
-
- if ( last_ns1==ns1 && last_top1==list1[0] && last_ns2==ns2 && last_top2==list2[0]);
- else
- {
-
-
- last_ns1=ns1;
- last_ns2=ns2;
- last_top1=list1[0];
- last_top2=list2[0];
- if ( check_list) free_int (check_list, -1);
- check_list=declare_int ( (CL->S)->nseq*(CL->S)->nseq, 3);
-
- for ( n_pair=0,a=0; a< ns1; a++)
- {
- s1 =list1[a];
- rs1=A->order[s1][0];
- for ( b=0; b< ns2; b++, n_pair++)
- {
- s2 =list2[b];
- rs2=A->order[s2][0];
- check_list[n_pair][0]=s1;
- check_list[n_pair][1]=s2;
- check_list[n_pair][2]=(!CL->DM)?0:(CL->DM)->similarity_matrix[rs1][rs2];
- }
- sort_int ( check_list, 3, 2, 0, n_pair-1);
- }
- }
-
- if ( !CL->evaluate_residue_pair){fprintf ( stderr, "\nWARNING: CL->evaluate_residue_pair Not set\nSet to: extend_residue_pair\n");CL->evaluate_residue_pair= extend_residue_pair;}
-
-
- for ( a=n_pair-1; a>=0; a--)
- {
- s1= check_list[a][0];
- rs1=A->order[s1][0];
- r1 =pos1[s1][col1];
-
- s2= check_list[a][1];
- rs2=A->order[s2][0];
- r2 =pos2[s2][col2];
-
- if ( r1>0 && r2 >0)
- {res_res++;}
- if ( rs1>rs2)
- {
- SWAP (rs1, rs2);
- SWAP (r1, r2);
- }
-
- if ((s=(CL->evaluate_residue_pair)(CL, rs1, r1, rs2, r2))!=UNDEFINED) score+=s;
- else
- {
-
- return UNDEFINED;
- }
- if ( res_res>=CL->max_n_pair && CL->max_n_pair!=0)a=0;
- }
-
- score=(res_res==0)?0:( (score)/res_res);
- score=score-SCORE_K*CL->nomatch;
-
- return (int)score;
- }
-
-
-
-
-int slow_get_dp_cost ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL)
- {
- double score=0;
-
- int a, b, s1, s2,r1, r2;
- int s;
- int gap_gap=0;
- int gap_res=0;
- int res_res=0;
- int rs1, rs2;
- static int last_tag;
- static int *dummy;
-
- if (col1<0 || col2<0) return 0;
- if ( last_tag !=A->random_tag)
- {
- last_tag=A->random_tag;
- if (!dummy)
- {
- dummy=vcalloc (10, sizeof(int));
- dummy[0]=1;/*Number of Amino acid types on colum*/
- dummy[1]=5;/*Length of Dummy*/
- dummy[3]='\0';/*Amino acid*/
- dummy[4]=1; /*Number of occurences*/
- dummy[5]=100; /*Frequency in the MSA column*/
- }
- }
-
- if ( !CL->evaluate_residue_pair){fprintf ( stderr, "\nWARNING: CL->evaluate_residue_pair Not set\nSet to: extend_residue_pair\n");CL->evaluate_residue_pair= extend_residue_pair;}
-
- for ( a=0; a< ns1; a++)
- {
- for ( b=0; b<ns2; b++)
- {
-
- s1 =list1[a];
- rs1=A->order[s1][0];
- r1 =pos1[s1][col1];
-
- s2 =list2[b];
- rs2=A->order[s2][0];
- r2 =pos2[s2][col2];
-
- if ( rs1>rs2)
- {
- SWAP (rs1, rs2);
- SWAP (r1, r2);
- }
-
- if (r1==0 && r2==0)gap_gap++;
- else if ( r1<0 || r2<0) gap_res++;
- else
- {
- res_res++;
- if ((s=(CL->evaluate_residue_pair)(CL, rs1, r1, rs2, r2))!=UNDEFINED) score+=s;
- else
- {
-
- return UNDEFINED;
- }
- }
-
- }
- }
-
-
- score=(res_res==0)?0:( (score)/res_res);
-
- return score-SCORE_K*CL->nomatch;
-
- }
-int slow_get_dp_cost_pc ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL)
- {
- double score=0;
-
- int a, b, s1, s2,r1, r2;
- int s;
- int gap_gap=0;
- int gap_res=0;
- int res_res=0;
- int rs1, rs2;
- static int last_tag;
- static int *dummy;
-
- if (col1<0 || col2<0) return 0;
- if ( last_tag !=A->random_tag)
- {
- last_tag=A->random_tag;
- if (!dummy)
- {
- dummy=vcalloc (10, sizeof(int));
- dummy[0]=1;/*Number of Amino acid types on colum*/
- dummy[1]=5;/*Length of Dummy*/
- dummy[3]='\0';/*Amino acid*/
- dummy[4]=1; /*Number of occurences*/
- dummy[5]=100; /*Frequency in the MSA column*/
- }
- }
-
- if ( !CL->evaluate_residue_pair){fprintf ( stderr, "\nWARNING: CL->evaluate_residue_pair Not set\nSet to: extend_residue_pair\n");CL->evaluate_residue_pair= extend_residue_pair;}
-
- for ( a=0; a< ns1; a++)
- {
- for ( b=0; b<ns2; b++)
- {
-
- s1 =list1[a];
- rs1=A->order[s1][0];
- r1 =pos1[s1][col1];
-
- s2 =list2[b];
- rs2=A->order[s2][0];
- r2 =pos2[s2][col2];
-
- if ( rs1>rs2)
- {
- SWAP (rs1, rs2);
- SWAP (r1, r2);
- }
-
- if (r1==0 && r2==0)gap_gap++;
- else if ( r1<0 || r2<0) gap_res++;
- else
- {
- res_res++;
- if ((s=residue_pair_extended_list_pc(CL, rs1, r1, rs2, r2))!=UNDEFINED) score+=s;
- else
- {
-
- return UNDEFINED;
- }
- }
-
- }
- }
-
-
- score=(res_res==0)?0:( (score)/res_res);
-
- return score;
-
- }
-int slow_get_dp_cost_test ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL)
- {
- double score=0;
-
- int a, b, s1, s2,r1, r2;
- int gap_gap=0, gap_res=0, res_res=0, rs1, rs2;
-
- for ( a=0; a< ns1; a++)
- {
- for ( b=0; b<ns2; b++)
- {
-
- s1 =list1[a];
- rs1=A->order[s1][0];
- r1 =pos1[s1][col1];
-
- s2 =list2[b];
- rs2=A->order[s2][0];
- r2 =pos2[s2][col2];
-
- if ( rs1>rs2)
- {
- SWAP (rs1, rs2);
- SWAP (r1, r2);
- }
-
- if (r1==0 && r2==0)gap_gap++;
- else if ( r1<0 || r2<0) gap_res++;
- else
- {
- res_res++;
- score+=residue_pair_extended_list_raw (CL, rs1, r1, rs2, r2);
- }
- }
- }
-
- return (int)(score*10)/(ns1*ns2);
- }
-
-int sw_get_dp_cost ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2, Constraint_list *CL)
- {
- int a, b;
- int s1,r1,rs1;
- int s2,r2,rs2;
-
-
-
-
- for ( a=0; a< ns1; a++)
- {
- s1 =list1[a];
- rs1=A->order[s1][0];
- r1 =pos1[s1][col1];
- if ( r1<=0)continue;
- for ( b=0; b< ns2; b++)
- {
-
-
- s2 =list2[b];
- rs2=A->order[s2][0];
- r2 =pos2[s2][col2];
-
- if (r2<=0)continue;
-
-
- if (sw_pair_is_defined (CL, rs1, r1, rs2, r2)==UNDEFINED)return UNDEFINED;
- }
- }
-
- return slow_get_dp_cost ( A, pos1, ns1, list1, col1, pos2, ns2, list2,col2, CL);
-
- }
-
-
-
-
-
-
-
-
-int get_domain_dp_cost ( Alignment *A, int**pos1, int ns1, int*list1, int col1, int**pos2, int ns2, int*list2, int col2,Constraint_list *CL , int scale , int gop, int gep)
-
- {
- int a, b, s1, s2,r1, r2;
- static int *entry;
- int *r;
- int score=0;
- int gap_gap=0;
- int gap_res=0;
- int res_res=0;
- int rs1, rs2;
- int flag_list_is_aa_sub_mat=0;
- int p;
-
-/*Needs to be cleanned After Usage*/
-
-
-
- if ( entry==NULL) entry=vcalloc (LIST_N_FIELDS , sizeof (int));
-
- for (a=0; a< ns1; a++)
- {
- s1=list1[a];
- rs1=A->order[s1][0];
- for ( b=0; b<ns2; b++)
- {
- s2 =list2[b];
- rs2=A->order[s2][0];
-
- entry[SEQ1]=rs1;
- entry[SEQ2]=rs2;
- r1=entry[R1]=pos1[s1][col1];
- r2=entry[R2]=pos2[s2][col2];
-
- if ( !flag_list_is_aa_sub_mat)
- {
- if ( r1==r2 && rs1==rs2)
- {
-
- return UNDEFINED;
- }
- else if (r1==0 && r2==0)
- {
- gap_gap++;
- }
- else if ( r1<=0 || r2<=0)
- {
- gap_res++;
- }
- else if ((r=main_search_in_list_constraint ( entry,&p,4,CL))!=NULL)
- {
- res_res++;
-
- if (r[WE]!=UNDEFINED)
- {
- score+=(r[WE]*SCORE_K)+scale;
- }
- else
- {
- fprintf ( stderr, "**");
- return UNDEFINED;
- }
- }
- }
- }
- }
- return score;
- score=((res_res+gap_res)==0)?0:score/(res_res+gap_res);
- return score;
- }
-
-/*********************************************************************************************/
-/* */
-/* FUNCTIONS FOR ANALYSING AL OR MATRIX */
-/* */
-/*********************************************************************************************/
-
-int aln2n_res ( Alignment *A, int start, int end)
- {
- int a, b;
- int score=0;
-
- for ( a=start; a<end; a++)for ( b=0; b< A->nseq; b++)score+=!is_gap(A->seq_al[b][a]);
- return score;
- }
-
-float get_gop_scaling_factor ( int **matrix,float id, int l1, int l2)
- {
- return id* get_avg_matrix_mm(matrix, AA_ALPHABET);
- }
-
-float get_avg_matrix_mm ( int **matrix, char *alphabet)
- {
- int a, b;
- float naa;
- float gop;
- int l;
-
-
- l=MIN(20,(int)strlen (alphabet));
- for (naa=0, gop=0,a=0; a<l; a++)
- for ( b=0; b<l; b++)
- {
- if ( a!=b)
- {
- gop+=matrix[alphabet[a]-'A'][alphabet[b]-'A'];
- naa++;
- }
- }
-
- gop=gop/naa;
- return gop;
- }
-float get_avg_matrix_match ( int **matrix, char *alphabet)
- {
- int a;
- float gop;
- int l;
-
-
-
-
- l=MIN(20,(int)strlen (alphabet));
- for (gop=0,a=0; a<l; a++)
- gop+=matrix[alphabet[a]-'A'][alphabet[a]-'A'];
-
- gop=gop/l;
- return gop;
- }
-
-float get_avg_matrix_diff ( int **matrix1,int **matrix2, char *alphabet)
- {
- int a, b;
- float naa;
- float gop;
- int l,v1;
-
-
-
-
- l=MIN(20,(int)strlen (alphabet));
- for (naa=0, gop=0,a=0; a<l; a++)
- for (b=0; b<l; b++)
- {
- v1=matrix1[alphabet[a]-'A'][alphabet[a]-'A']-matrix2[alphabet[b]-'A'][alphabet[b]-'A'];
- gop+=v1*v1;
- naa++;
- }
-
- gop=gop/l;
- return gop;
- }
-
-float* set_aa_frequencies ()
- {
-
- float *frequency;
- /*frequencies tqken from psw*/
-
- frequency=vcalloc (100, sizeof (float));
- frequency ['x'-'A']=0.0013;
- frequency ['a'-'A']=0.0076;
- frequency ['c'-'A']=0.0176;
- frequency ['d'-'A']=0.0529;
- frequency ['e'-'A']=0.0628;
- frequency ['f'-'A']=0.0401;
- frequency ['g'-'A']=0.0695;
- frequency ['h'-'A']=0.0224;
- frequency ['i'-'A']=0.0561;
- frequency ['k'-'A']=0.0584;
- frequency ['l'-'A']=0.0922;
- frequency ['m'-'A']=0.0236;
- frequency ['n'-'A']=0.0448;
- frequency ['p'-'A']=0.0500;
- frequency ['q'-'A']=0.0403;
- frequency ['r'-'A']=0.0523;
- frequency ['s'-'A']=0.0715;
- frequency ['t'-'A']=0.0581;
- frequency ['v'-'A']=0.0652;
- frequency ['w'-'A']=0.0128;
- frequency ['y'-'A']=0.0321;
- frequency ['X'-'A']=0.0013;
- frequency ['A'-'A']=0.0076;
- frequency ['C'-'A']=0.0176;
- frequency ['D'-'A']=0.0529;
- frequency ['E'-'A']=0.0628;
- frequency ['F'-'A']=0.0401;
- frequency ['G'-'A']=0.0695;
- frequency ['H'-'A']=0.0224;
- frequency ['I'-'A']=0.0561;
- frequency ['J'-'A']=0.0584;
- frequency ['L'-'A']=0.0922;
- frequency ['M'-'A']=0.0236;
- frequency ['N'-'A']=0.0448;
- frequency ['P'-'A']=0.0500;
- frequency ['Q'-'A']=0.0403;
- frequency ['R'-'A']=0.0523;
- frequency ['S'-'A']=0.0715;
- frequency ['T'-'A']=0.0581;
- frequency ['V'-'A']=0.0652;
- frequency ['W'-'A']=0.0128;
- frequency ['Y'-'A']=0.0321;
- return frequency;
- }
-
-float measure_matrix_pos_avg (int **matrix,char *alphabet)
-{
- float naa=0, tot=0;
- int a, b;
-
-
- for ( tot=0,a=0; a< 20; a++)
- for ( b=0; b<20; b++)
- {
- if (matrix[alphabet[a]-'A'][alphabet[b]-'A']>=0){naa++;tot+=matrix[alphabet[a]-'A'][alphabet[b]-'A'];}
-
- }
- return tot/naa;
-}
-
-float measure_matrix_enthropy (int **matrix,char *alphabet)
- {
-
- int a, b;
- double s, p, q, h=0, tq=0;
- float lambda;
- float *frequency;
- /*frequencies tqken from psw*/
-
- frequency=set_aa_frequencies ();
-
-
- lambda=compute_lambda(matrix,alphabet);
- fprintf ( stderr, "\nLambda=%f", (float)lambda);
-
- for ( a=0; a< 20; a++)
- for ( b=0; b<=a; b++)
- {
- s=matrix[alphabet[a]-'A'][alphabet[b]-'A'];
-
-
- p=frequency[alphabet[a]-'A']*frequency[alphabet[b]-'A'];
-
- if ( p==0)continue;
-
- q=exp(lambda*s+log(p));
-
- tq+=q;
- h+=q*log(q/p)*log(2);
-
- }
-
- fprintf ( stderr,"\ntq=%f\n", (float)tq);
-
- return (float) h;
- }
-float compute_lambda (int **matrix,char *alphabet)
-{
-
- int a, b;
- double lambda, best_lambda=0, delta, best_delta=0, p, tq,s;
- static float *frequency;
-
- if ( !frequency)frequency=set_aa_frequencies ();
-
- for ( lambda=0.001; lambda<1; lambda+=0.005)
- {
- tq=0;
- for ( a=0; a< 20; a++)
- for ( b=0; b<20; b++)
- {
- p=frequency[alphabet[a]-'A']*frequency[alphabet[b]-'A'];
- s=matrix[alphabet[a]-'A'][alphabet[b]-'A'];
- tq+=exp(lambda*s+log(p));
- }
- delta=fabs(1-tq);
- if (lambda==0.001)
- {
- best_delta=delta;
- best_lambda=lambda;
- }
- else
- {
- if (delta<best_delta)
- {
- best_delta=delta;
- best_lambda=lambda;
- }
- }
-
- fprintf ( stderr, "\n%f %f ", lambda, tq);
- if ( tq>1)break;
- }
- fprintf ( stderr, "\nRESULT: %f %f ", best_lambda, best_delta);
- return (float) best_lambda;
-}
-
-
-
-float evaluate_random_match (char *mat, int n, int len,char *alp)
-{
- int **matrix;
- matrix=read_matrice ( mat);
- fprintf ( stderr, "Matrix=%15s ", mat);
- return evaluate_random_match2 (matrix, n,len,alp);
-
-}
-
-float evaluate_random_match2 (int **matrix, int n, int len,char *alp)
-{
- int a, b, c, d, c1, c2, tot;
- static int *list;
- static float *freq;
- float score_random=0;
- float score_id=0;
- float score_good=0;
- float tot_len=0;
- float tot_try=0;
-
-
- if ( !list)
- {
- vsrand(0);
- freq=set_aa_frequencies ();
- list=vcalloc ( 10000, sizeof (char));
- }
-
- for (tot=0,c=0,a=0;a<20; a++)
- {
- b=freq[alp[a]-'A']*1000;
- tot+=b;
- for (d=0; d<b; d++, c++)
- {
- list[c]=alp[a];
- }
- }
-
-
- for (a=0; a< len*n; a++)
- {
- c1=rand()%tot;
- c2=rand()%tot;
- score_random+=matrix[list[c1]-'A'][list[c2]-'A'];
- score_id+=matrix[list[c1]-'A'][list[c1]-'A'];
- }
- while (tot_len< len*n)
- {
- tot_try++;
- c1=rand()%tot;
- c2=rand()%tot;
- if ( matrix[list[c1]-'A'][list[c2]-'A']>=0){score_good+=matrix[list[c1]-'A'][list[c2]-'A']; tot_len++;}
- }
-
-
- score_random=score_random/tot_len;
- score_id=score_id/tot_len;
- score_good=score_good/tot_len;
-
- fprintf ( stderr, "Random=%8.3f Id=%8.3f Good=%8.3f [%7.2f]\n",score_random, score_id, score_good, tot_len/tot_try);
-
- return score_random;
-}
-float compare_two_mat (char *mat1,char*mat2, int n, int len,char *alp)
-{
- int **matrix1, **matrix2;
-
- evaluate_random_match (mat1, n, len,alp);
- evaluate_random_match (mat2, n, len,alp);
- matrix1=read_matrice ( mat1);
- matrix2=read_matrice ( mat2);
- matrix1=rescale_matrix(matrix1, 10, alp);
- matrix2=rescale_matrix(matrix2, 10, alp);
- compare_two_mat_array(matrix1,matrix2, n, len,alp);
- return 0;
-}
-
-
-int ** rescale_two_mat (char *mat1,char*mat2, int n, int len,char *alp)
-{
- float lambda;
- int **matrix1, **matrix2;
-
- lambda=measure_lambda2 (mat1, mat2, n, len, alp)*10;
-
- fprintf ( stderr, "\nLambda=%.2f", lambda);
- matrix2=read_matrice(mat2);
- matrix2=neg_matrix2pos_matrix(matrix2);
- matrix2=rescale_matrix( matrix2, lambda,"abcdefghiklmnpqrstvwxyz");
-
- matrix1=read_matrice(mat1);
- matrix1=neg_matrix2pos_matrix(matrix1);
- matrix1=rescale_matrix( matrix1,10,"abcdefghiklmnpqrstvwxyz");
-
- output_matrix_header ( "stdout", matrix2, alp);
- evaluate_random_match2(matrix1, 1000, 100, alp);
- evaluate_random_match2(matrix2, 1000, 100, alp);
- compare_two_mat_array(matrix1,matrix2, n, len,alp);
-
- return matrix2;
-}
-float measure_lambda2(char *mat1,char*mat2, int n, int len,char *alp)
-{
- int **m1, **m2;
- float f1, f2;
-
- m1=read_matrice (mat1);
- m2=read_matrice (mat2);
-
- m1=neg_matrix2pos_matrix(m1);
- m2=neg_matrix2pos_matrix(m2);
-
- f1=measure_matrix_pos_avg( m1, alp);
- f2=measure_matrix_pos_avg( m2, alp);
-
- return f1/f2;
-}
-
-
-float measure_lambda (char *mat1,char*mat2, int n, int len,char *alp)
-{
- int c;
- int **matrix1, **matrix2, **mat;
- float a;
- float best_quality=0, quality=0, best_lambda=0;
-
- matrix1=read_matrice ( mat1);
- matrix2=read_matrice ( mat2);
- matrix1=rescale_matrix(matrix1, 10, alp);
- matrix2=rescale_matrix(matrix2, 10, alp);
-
- for (c=0, a=0.1; a< 2; a+=0.05)
- {
- fprintf ( stderr, "Lambda=%.2f\n", a);
- mat=duplicate_int (matrix2,-1,-1);
- mat=rescale_matrix(mat, a, alp);
- quality=compare_two_mat_array(matrix1,mat, n, len,alp);
- quality=MAX((-quality),quality);
-
- if (c==0 || (best_quality>quality))
- {
- c=1;
- fprintf ( stderr, "*");
- best_quality=quality;
- best_lambda=a;
- }
-
-
- evaluate_random_match2(mat, 1000, 100, alp);
- evaluate_random_match2(matrix1, 1000, 100, alp);
- free_int (mat, -1);
- }
-
- return best_lambda;
-
-}
-
-float compare_two_mat_array (int **matrix1,int **matrix2, int n, int len,char *alp)
-{
- int a, b, c, d, c1, c2, tot;
- static int *list;
- static float *freq;
- float delta_random=0;
- float delta2_random=0;
-
- float delta_id=0;
- float delta2_id=0;
-
- float delta_good=0;
- float delta2_good=0;
-
- float delta;
-
- float tot_len=0;
- float tot_try=0;
-
-
-
- if ( !list)
- {
- vsrand(0);
- freq=set_aa_frequencies ();
- list=vcalloc ( 10000, sizeof (char));
- }
-
- for (tot=0,c=0,a=0;a<20; a++)
- {
- b=freq[alp[a]-'A']*1000;
- tot+=b;
- for (d=0; d<b; d++, c++)
- {
- list[c]=alp[a];
- }
- }
-
-
- for (a=0; a< len*n; a++)
- {
- c1=rand()%tot;
- c2=rand()%tot;
- delta=matrix1[list[c1]-'A'][list[c2]-'A']-matrix2[list[c1]-'A'][list[c2]-'A'];
- delta_random+=delta;
- delta2_random+=MAX(delta,(-delta));
-
- delta=matrix1[list[c1]-'A'][list[c1]-'A']-matrix2[list[c1]-'A'][list[c1]-'A'];
- delta_id+=delta;
- delta2_id+=MAX(delta,(-delta));
- }
- while (tot_len< len*n)
- {
- tot_try++;
- c1=rand()%tot;
- c2=rand()%tot;
- if ( matrix1[list[c1]-'A'][list[c2]-'A']>=0 || matrix2[list[c1]-'A'][list[c2]-'A'] )
- {
- delta=matrix1[list[c1]-'A'][list[c2]-'A']-matrix2[list[c1]-'A'][list[c2]-'A'];
- delta_good+=delta;
- delta2_good+=MAX(delta,(-delta));
- tot_len++;
- }
- }
-
-
- delta_random=delta_random/tot_len;
- delta2_random=delta2_random/tot_len;
-
-
- delta_id=delta_id/tot_len;
- delta2_id=delta2_id/tot_len;
-
- delta_good=delta_good/tot_len;
- delta2_good=delta2_good/tot_len;
-
-
- fprintf ( stderr, "\tRand=%8.3f %8.3f\n\tId =%8.3f %8.3f\n\tGood=%8.3f %8.3f\n",delta_random, delta2_random, delta_id,delta2_id, delta_good,delta2_good);
-
- return delta_good;
-}
-
-
-
-int ** rescale_matrix ( int **matrix, float lambda, char *alp)
-{
- int a, b;
-
-
- for ( a=0; a< 20; a++)
- for ( b=0; b< 20; b++)
- {
- matrix[alp[a]-'A'][alp[b]-'A']= matrix[alp[a]-'A'][alp[b]-'A']*lambda;
- }
- return matrix;
-}
-int **mat2inverted_mat (int **matrix, char *alp)
-{
- int a, b, min, max, v,l;
- int c1,c2, C1, C2;
-
- l=(int)strlen (alp);
- min=max=matrix[alp[0]-'A'][alp[0]-'A'];
- for ( a=0; a<l; a++)
- for ( b=0; b< l; b++)
- {
- v=matrix[alp[a]-'A'][alp[b]-'A'];
- min=MIN(min,v);
- max=MAX(max,v);
- }
- for ( a=0; a<l; a++)
- for ( b=0; b< l; b++)
- {
- v=matrix[alp[a]-'A'][alp[b]-'A'];
- v=max-v;
-
- c1=tolower(alp[a])-'A';
- c2=tolower(alp[b])-'A';
-
- C1=toupper(alp[a])-'A';
- C2=toupper(alp[b])-'A';
- matrix[C1][C2]=matrix[c1][c2]=matrix[C1][c2]=matrix[c1][C2]=v;
- }
- return matrix;
-}
-void output_matrix_header ( char *name, int **matrix, char *alp)
-{
- int a, b;
- FILE *fp;
- char *nalp;
- int l;
- nalp=vcalloc ( 1000, sizeof (char));
-
-
-
- sprintf ( nalp, "ABCDEFGHIKLMNPQRSTVWXYZ");
- l=(int)strlen (nalp);
-
- fp=vfopen ( name, "w");
- fprintf (fp, "\nint []={\n");
-
- for (a=0; a<l; a++)
- {
- for ( b=0; b<=a; b++)
- fprintf ( fp, "%3d, ",matrix[nalp[a]-'A'][nalp[b]-'A']);
- fprintf ( fp, "\n");
- }
- fprintf (fp, "};\n");
-
- vfclose (fp);
-}
-
-
-float ** initialise_aa_physico_chemical_property_table (int *n)
-{
- FILE *fp;
- int a, b,c;
- float **p;
- char c1;
- float v1, max, min;
- int in=0;
-
- n[0]=0;
- fp=vfopen ("properties.txt", "r");
- while ((c=fgetc(fp))!=EOF)n[0]+=(c=='#');
- vfclose (fp);
-
-
-
- p=declare_float ( n[0], 'z'+1);
- fp=vfopen ("properties.txt", "r");
- n[0]=0;
- while ((c=fgetc(fp))!=EOF)
- {
- if (c=='#'){in=0;while (fgetc(fp)!='\n');}
- else
- {
- if ( !in){in=1; ++n[0];}
- fscanf (fp, "%f %c %*s",&v1, &c1 );
- p[n[0]-1][tolower(c1)]=v1;
- while ( (c=fgetc(fp))!='\n');
- }
- }
- vfclose (fp);
-
- /*rescale so that Delta max=10*/
- for (a=0; a<n[0]; a++)
- {
- min=max=p[a]['a'];
- for (b='a'; b<='z'; b++)
- {
- min=(p[a][b]<min)?p[a][b]:min;
- max=(p[a][b]>max)?p[a][b]:max;
- }
- for (b='a'; b<='z'; b++)
- {
- p[a][b]=((p[a][b]-min)/(max-min))*10;
-
- }
- }
-
- return p;
-}
-Constraint_list * choose_extension_mode ( char *extend_mode, Constraint_list *CL)
-{
- //evaluation_functions: residues start at 1, sequences at 0;
-
- if ( !CL)
- {
- fprintf ( stderr, "\nWarning: CL was not set");
- return CL;
- }
- else if ( strm ( extend_mode, "rna0"))
- {
- CL->evaluate_residue_pair=residue_pair_extended_list;
- CL->get_dp_cost =slow_get_dp_cost;
- }
- else if ( strm ( extend_mode, "rna1") || strm (extend_mode, "rna"))
- {
- CL->evaluate_residue_pair=residue_pair_extended_list4rna1;
- CL->get_dp_cost =slow_get_dp_cost;
- }
- else if ( strm ( extend_mode, "rna2"))
- {
- CL->evaluate_residue_pair=residue_pair_extended_list4rna2;
- CL->get_dp_cost =slow_get_dp_cost;
- }
- else if ( strm ( extend_mode, "rna3"))
- {
- CL->evaluate_residue_pair=residue_pair_extended_list4rna3;
- CL->get_dp_cost =slow_get_dp_cost;
- }
- else if ( strm ( extend_mode, "rna4"))
- {
- CL->evaluate_residue_pair=residue_pair_extended_list4rna4;
- CL->get_dp_cost =slow_get_dp_cost;
- }
- else if ( strm ( extend_mode, "pc") && !CL->M)
- {
- CL->evaluate_residue_pair=residue_pair_extended_list_pc;
- CL->get_dp_cost =slow_get_dp_cost;
- }
- else if ( strm ( extend_mode, "triplet") && !CL->M)
- {
- CL->evaluate_residue_pair=residue_pair_extended_list;
- CL->get_dp_cost =get_dp_cost;
- }
- else if ( strm ( extend_mode, "relative_triplet") && !CL->M)
- {
- CL->evaluate_residue_pair=residue_pair_relative_extended_list;
- CL->get_dp_cost =fast_get_dp_cost_2;
- }
- else if ( strm ( extend_mode, "g_coffee") && !CL->M)
- {
- CL->evaluate_residue_pair=residue_pair_extended_list_g_coffee;
- CL->get_dp_cost =slow_get_dp_cost;
- }
- else if ( strm ( extend_mode, "g_coffee_quadruplets") && !CL->M)
- {
- CL->evaluate_residue_pair=residue_pair_extended_list_g_coffee_quadruplet;
- CL->get_dp_cost =slow_get_dp_cost;
- }
- else if ( strm ( extend_mode, "fast_triplet") && !CL->M)
- {
- CL->evaluate_residue_pair=residue_pair_extended_list;
- CL->get_dp_cost =fast_get_dp_cost;
- }
-
- else if ( strm ( extend_mode, "very_fast_triplet") && !CL->M)
- {
- CL->evaluate_residue_pair=residue_pair_extended_list;
- CL->get_dp_cost =fast_get_dp_cost_2;
- }
- else if ( strm ( extend_mode, "slow_triplet") && !CL->M)
- {
- CL->evaluate_residue_pair=residue_pair_extended_list;
- CL->get_dp_cost =slow_get_dp_cost;
- }
- else if ( strm ( extend_mode, "mixt") && !CL->M)
- {
- CL->evaluate_residue_pair=residue_pair_extended_list_mixt;
- CL->get_dp_cost=slow_get_dp_cost;
- }
- else if ( strm ( extend_mode, "quadruplet") && !CL->M)
- {
- CL->evaluate_residue_pair=residue_pair_extended_list_quadruplet;
- CL->get_dp_cost =get_dp_cost_quadruplet;
- }
- else if ( strm ( extend_mode, "test") && !CL->M)
- {
- CL->evaluate_residue_pair=residue_pair_test_function;
- CL->get_dp_cost =slow_get_dp_cost_test;
- }
- else if ( strm ( extend_mode, "ssp"))
- {
-
- CL->evaluate_residue_pair=evaluate_ssp_matrix_score;
- CL->get_dp_cost=slow_get_dp_cost;
- CL->normalise=1;
- }
- else if ( strm ( extend_mode, "tm"))
- {
-
- CL->evaluate_residue_pair=evaluate_tm_matrix_score;
- CL->get_dp_cost=slow_get_dp_cost;
- CL->normalise=1;
- }
- else if ( strm ( extend_mode, "matrix"))
- {
-
- CL->evaluate_residue_pair=evaluate_matrix_score;
- CL->get_dp_cost=cw_profile_get_dp_cost;
- CL->normalise=1;
- }
- else if ( strm ( extend_mode, "curvature"))
- {
- CL->evaluate_residue_pair=evaluate_curvature_score;
- CL->get_dp_cost=slow_get_dp_cost;
- CL->normalise=1;
- }
- else if ( CL->M)
- {
- CL->evaluate_residue_pair=evaluate_matrix_score;
- CL->get_dp_cost=cw_profile_get_dp_cost;
- CL->normalise=1;
- }
- else
- {
- fprintf ( stderr, "\nERROR: %s is an unknown extend_mode[FATAL:%s]\n", extend_mode, PROGRAM);
- myexit (EXIT_FAILURE);
- }
-
- return CL;
-}
-int ** combine_two_matrices ( int **mat1, int **mat2)
-{
- int naa, re1, re2, Re1, Re2, a, b, u, l;
-
- naa=(int)strlen (BLAST_AA_ALPHABET);
- for ( a=0; a< naa; a++)
- for ( b=0; b< naa; b++)
- {
- re1=BLAST_AA_ALPHABET[a];
- re2=BLAST_AA_ALPHABET[b];
- if (re1=='*' || re2=='*');
- else
- {
-
- Re1=toupper(re1);Re2=toupper(re2);
- re1-='A';re2-='A';Re1-='A';Re2-='A';
-
- l=mat1[re1][re2];
- u=mat2[re1][re2];
- mat1[re1][re2]=mat2[re1][re2]=l;
- mat2[Re1][Re2]=mat2[Re1][Re2]=u;
- }
- }
- return mat1;
-}
-
-/* Off the shelves evaluations */
-/*********************************************************************************************/
-/* */
-/* OFF THE SHELVES EVALUATION */
-/* */
-/*********************************************************************************************/
-
-
-int lat_sum_pair (Alignment *A, char *mat)
-{
- int a,b,c, tot=0, v1, v2, score;
- int **matrix;
-
- matrix=read_matrice (mat);
-
- for (a=0; a<A->nseq; a++)
- for ( b=0; b<A->nseq; b++)
- {
- for (c=1; c<A->len_aln; c++)
- {
- char r11, r12;
-
- r11=A->seq_al[a][c-1];
- r12=A->seq_al[a][c];
- if (is_gap(r11) || is_gap(r12))continue;
- else v1=matrix[r11-'A'][r12-'A'];
-
- r11=A->seq_al[b][c-1];
- r12=A->seq_al[b][c];
- if (is_gap(r11) || is_gap(r12))continue;
- else v2=matrix[r11-'A'][r12-'A'];
-
- score+=(v1-v2)*(v1-v2);
- tot++;
- }
- }
- score=(100*score)/tot;
- return (float)score;
-}
-
-
-
-/* Off the shelves evaluations */
-/*********************************************************************************************/
-/* */
-/* OFF THE SHELVES EVALUATION */
-/* */
-/*********************************************************************************************/
-
-int comp_pair ( int len,char *sa, char *sb, int seqA, int seqB,int *tgp_a, int *tgp_b,int gap_op,int gap_ex, int start, int end,int **matrix,int MODE);
-int score_gap ( int len, char *sa, char *sb,int seqA, int seqB,int *tgp_a, int *tgp_b, int op, int ex, int start, int end, int MODE);
-void evaluate_tgp_decoded_chromosome ( Alignment *A,int **TGP,int start, int end,int MODE);
-int gap_type ( char a, char b);
-
-
-
-float sum_pair ( Alignment*A,char *mat_name, int gap_op, int gap_ex)
- {
- int a,b;
- float pscore=0;
-
- int start, end;
- static int **tgp;
- double score=0;
- int MODE=1;
- int **matrix;
-
- matrix=read_matrice (mat_name);
- matrix=mat2inverted_mat (matrix, "acdefghiklmnpqrstvwy");
-
- start=0;
- end=A->len_aln-1;
-
- if ( tgp==NULL)
- tgp= declare_int (A->nseq,2);
-
- evaluate_tgp_decoded_chromosome ( A,tgp,start, end,MODE);
-
- for ( a=0; a< A->nseq-1; a++)
- for (b=a+1; b<A->nseq; b++)
- {
- pscore= comp_pair (A->len_aln,A->seq_al[a], A->seq_al[b],a, b,tgp[a], tgp[b],gap_op,gap_ex, start, end,matrix, MODE);
- score+=pscore*100;
- /*score+=(double)pscore*(int)(PARAM->OFP)->weight[A->order[a][0]][A->order[b][0]];*//*NO WEIGHTS*/
- }
-
- score=score/(A->nseq*A->nseq);
-
- return (float)score;
- }
-
-int comp_pair ( int len,char *sa, char *sb, int seqA, int seqB,int *tgp_a, int *tgp_b,int gap_op,int gap_ex, int start, int end,int **matrix,int MODE)
- {
- int score=0, a, ex;
-
-
-
- if ( end-start>=0)
- score+= score_gap (len, sa,sb, seqA, seqB,tgp_a, tgp_b, gap_op,gap_ex, start, end,MODE);
-
- ex=gap_ex;
-
-
- for (a=start; a<=end; a++)
- {
- if ( is_gap(sa[a]) || is_gap(sb[a]))
- {
- if (is_gap(sa[a]) && is_gap(sb[a]));
- else
- {
-
- score +=ex;
- }
- }
- else
- {
- score += matrix [sa[a]-'A'][sb[a]-'A'];
-
- }
- }
- return score;
- }
-int score_gap ( int len, char *sa, char *sb,int seqA, int seqB,int *tgp_a, int *tgp_b, int op, int ex, int start, int end, int MODE)
- {
- int a,b;
- int ga=0,gb=0;
- int score=0;
-
-
- int right_gap, left_gap;
-
-
-
-
-
- int type;
- int flag1=0;
- int flag2=0;
- int continue_loop;
- int sequence_pattern[2][3];
- int null_gap;
- int natural_gap=1;
-
- /*op= gor_gap_op ( 0,seqA, seqB, PARAM);
- ex= gor_gap_ext ( 0, seqA, seqB, PARAM);*/
-
-
-
- for (a=start; a<=end; ++a)
- {
-
- type= gap_type ( sa[a], sb[a]);
-
- if ( type==2 && ga<=gb)
- {++ga;
- gb=0;
- score += op;
- }
- else if (type==1 && ga >=gb)
- {
- ++gb;
- ga=0;
- score +=op;
- }
- else if (type==0)
- {
- ga++;
- gb++;
- }
-
- else if (type== -1)
- ga=gb=0;
-
-
- if (natural_gap==0)
- {
- if ( type== -1)
- flag1=flag2=0;
- else if ( type==0)
- flag2=1;
- else if ( (type==flag1) && flag2==1)
- {
- score+=op;
- flag2=0;
- }
- else if ( (type!=flag1) && flag2==1)
- {
- flag1=type;
- flag2=0;
- }
- else if ( flag2==0)
- flag1=type;
- }
- }
- /*gap_type -/-:0, X/X:-1 X/-:1, -/X:2*/
-/*evaluate the pattern of gaps*/
-
- continue_loop=1;
- sequence_pattern[0][0]=sequence_pattern[1][0]=0;
- for ( a=start; a<=end && continue_loop==1; a++)
- {
- left_gap= gap_type ( sa[a], sb[a]);
- if ( left_gap!= 0)
- {
- if ( left_gap==-1)
- {
- sequence_pattern[0][0]=sequence_pattern[1][0]=0;
- continue_loop=0;
- }
- else
- {
- null_gap=0;
- for (b=a; b<=end && continue_loop==1; b++)
- {type=gap_type( sa[b], sb[b]);
- if (type==0)
- null_gap++;
- if ( type!=left_gap && type !=0)
- {
- continue_loop=0;
- sequence_pattern[2-left_gap][0]= b-a-null_gap;
- sequence_pattern [1-(2-left_gap)][0]=0;
- }
- }
- if ( continue_loop==1)
- {
- continue_loop=0;
- sequence_pattern[2-left_gap][0]= b-a-null_gap;
- sequence_pattern [1-(2-left_gap)][0]=0;
- }
- }
- }
- }
-
- sequence_pattern[0][2]=sequence_pattern[1][2]=1;
- for ( a=start; a<=end; a++)
- {
- if ( !is_gap(sa[a]))
- sequence_pattern[0][2]=0;
- if ( !is_gap(sb[a]))
- sequence_pattern[1][2]=0;
-
- }
- continue_loop=1;
- sequence_pattern[0][1]=sequence_pattern[1][1]=0;
- for ( a=end; a>=start && continue_loop==1; a--)
- {
- right_gap= gap_type ( sa[a], sb[a]);
- if ( right_gap!= 0)
- {
- if ( right_gap==-1)
- {
- sequence_pattern[0][1]=sequence_pattern[1][1]=0;
- continue_loop=0;
- }
- else
- {
- null_gap=0;
- for (b=a; b>=start && continue_loop==1; b--)
- {type=gap_type( sa[b], sb[b]);
- if ( type==0)
- null_gap++;
- if ( type!=right_gap && type !=0)
- {
- continue_loop=0;
- sequence_pattern[2-right_gap][1]= a-b-null_gap;
- sequence_pattern [1-(2-right_gap)][1]=0;
- }
- }
- if ( continue_loop==1)
- {
- continue_loop=0;
- sequence_pattern[2-right_gap][1]= a-b-null_gap;
- sequence_pattern [1-(2-right_gap)][1]=0;
- }
- }
- }
- }
-
-/*
-printf ( "\n*****************************************************");
-printf ( "\n%c\n%c", sa[start],sb[start]);
-printf ( "\n%d %d %d",sequence_pattern[0][0] ,sequence_pattern[0][1], sequence_pattern[0][2]);
-printf ( "\n%d %d %d",sequence_pattern[1][0] ,sequence_pattern[1][1], sequence_pattern[1][2]);
-printf ( "\n*****************************************************");
-*/
-
-/*correct the scoring*/
-
-
- if ( MODE==0)
- {
- if ( FABS(tgp_a[0])>1 && (FABS(tgp_a[0])>FABS( tgp_b[0])))
- score-= (sequence_pattern[0][0]>0)?op:0;
- if ( FABS(tgp_b[0])>1 && (FABS(tgp_b[0])> FABS(tgp_a[0])))
- score-= (sequence_pattern[1][0]>0)?op:0;
- }
- else if ( MODE ==1 || MODE ==2)
- {
- if ( FABS(tgp_a[0])>1 && (FABS(tgp_a[0])>FABS( tgp_b[0])) && (tgp_a[1]!=1 || sequence_pattern[0][2]==0))
- score-= (sequence_pattern[0][0]>0)?op:0;
- if ( FABS(tgp_b[0])>1 && (FABS(tgp_b[0])> FABS(tgp_a[0])) && (tgp_b[1]!=1 || sequence_pattern[1][2]==0))
- score-= (sequence_pattern[1][0]>0)?op:0;
-
-
- if ( tgp_a[0]>=1 && tgp_a[0]==tgp_b[0])
- score -=(sequence_pattern[0][0]>0)?op:0;
- if ( tgp_b[0]>=1 && tgp_a[0]==tgp_b[0])
- score-= (sequence_pattern[1][0]>0)?op:0;
-
-
- if ( tgp_a[1]==1 && sequence_pattern[0][2]==0)
- score -= ( sequence_pattern[0][1]>0)?op:0;
- else if ( tgp_a[1]==1 && sequence_pattern[0][2]==1 && tgp_a[0]<=0)
- score -= ( sequence_pattern[0][1]>0)?op:0;
-
-
- if ( tgp_b[1]==1 && sequence_pattern[1][2]==0)
- score -= ( sequence_pattern[1][1]>0)?op:0;
- else if ( tgp_b[1]==1 && sequence_pattern[1][2]==1 && tgp_b[0]<=0)
- score -= ( sequence_pattern[1][1]>0)?op:0;
-
- if ( MODE==2)
- {
- if ( tgp_a[0]>0)
- score -=sequence_pattern[0][0]*ex;
- if ( tgp_b[0]>0)
- score -= sequence_pattern[1][0]*ex;
- if ( tgp_a[1]>0)
- score-=sequence_pattern[0][1]*ex;
- if ( tgp_b[1]>0)
- score-=sequence_pattern[1][1]*ex;
- }
- }
-
-
- return score;
-
-
-
- }
-void evaluate_tgp_decoded_chromosome ( Alignment *A,int **TGP,int start, int end,int MODE)
- {
- int a,b;
- int continue_loop;
-
-
-
- if (MODE==11 || MODE==13|| MODE==14)
- {
- if ( start==0)for ( a=0; a<A->nseq; a++)TGP[a][0]=-1;
- else for ( a=0; a<A->nseq; a++)TGP[a][0]=(is_gap(A->seq_al[a][start-1])==1)?0:1;
-
- if ( end==A->len_aln-1)for ( a=0; a<A->nseq; a++)TGP[a][1]=-1;
- else for ( a=0; a<A->nseq; a++)TGP[a][1]=(is_gap(A->seq_al[a][start-1])==1)?0:1;
- }
- else
- {
- /* 0: in the middle of the alignement
- 1: natural end
- 2: q left gap is the continuation of another gap that was open outside the bloc ( don't open it)
- */
-
- for ( a=0; a< A->nseq; a++)
- {
- TGP[a][0]=1;
- TGP[a][1]=1;
- for ( b=0; b< start; b++)
- if ( !is_gap(A->seq_al[a][b]))
- TGP[a][0]=0;
- if ( start>0 )
- {
- if (is_gap(A->seq_al[a][start-1]) && TGP[a][0]!=1)
- {TGP[a][0]=-1;
- continue_loop=1;
- for ( b=(start-1); b>=0 && continue_loop==1; b--)
- {TGP[a][0]-= ( is_gap(A->seq_al[a][b])==1)?1:0;
- continue_loop= (is_gap(A->seq_al[a][b])==1)?continue_loop:0;
- }
- }
- }
- else if (is_gap(A->seq_al[a][start-1]) && TGP[a][0]==1)
- {
- TGP[a][0]=1;
- continue_loop=1;
- for ( b=(start-1); b>=0 && continue_loop==1; b--)
- {TGP[a][0]+= ( is_gap(A->seq_al[a][b])==1)?1:0;
- continue_loop= (is_gap(A->seq_al[a][b])==1)?continue_loop:0;
- }
- }
- for ( b=(A->len_aln-1); b>end; b--)
- if ( !is_gap(A->seq_al[a][b]))
- TGP[a][1]=0;
- }
- }
- }
-int gap_type ( char a, char b)
- {
- /*gap_type -/-:0, X/X:-1 X/-:1, -/STAR:2*/
-
- if ( is_gap(a) && is_gap(b))
- return 0;
- else if ( !is_gap(a) && !is_gap(b))
- return -1;
- else if ( !is_gap(a))
- return 1;
- else if ( !is_gap(b))
- return 2;
- else
- return -1;
- }
-
-/******************************COPYRIGHT NOTICE*******************************/
-/*© Centro de Regulacio Genomica */
-/*and */
-/*Cedric Notredame */
-/*Fri Feb 18 08:27:45 CET 2011 - Revision 596. */
-/*All rights reserved.*/
-/*This file is part of T-COFFEE.*/
-/**/
-/* T-COFFEE is free software; you can redistribute it and/or modify*/
-/* it under the terms of the GNU General Public License as published by*/
-/* the Free Software Foundation; either version 2 of the License, or*/
-/* (at your option) any later version.*/
-/**/
-/* T-COFFEE is distributed in the hope that it will be useful,*/
-/* but WITHOUT ANY WARRANTY; without even the implied warranty of*/
-/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/
-/* GNU General Public License for more details.*/
-/**/
-/* You should have received a copy of the GNU General Public License*/
-/* along with Foobar; if not, write to the Free Software*/
-/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/
-/*............................................... |*/
-/* If you need some more information*/
-/* cedric.notredame@europe.com*/
-/*............................................... |*/
-/**/
-/**/
-/* */
-/******************************COPYRIGHT NOTICE*******************************/