7 #include "io_lib_header.h"
8 #include "util_lib_header.h"
9 #include "define_header.h"
10 #include "dp_lib_header.h"
13 static double mc_delta_matrix ( int ***mat1, int ***mat2, char **alp, int nch);
14 static double delta_matrix ( int **mat1,int **mat2, char *alp);
15 static double ***pavie_seq2pavie_fmat (Sequence *S,double *gop, double *gep, char **mat, char *idmat, int id_threshold, int sample_size, int nch, char *param );
16 static int **pavie_fmat2pavie_logodd_mat (double **fmat, char *alp);
17 static double **pavie_aln2fmat(Alignment *A, double **fmat, char *idmat, int id_threshold, int ch, int nch, char *param);
18 static int pavie_mat2pavie_id_mat ( int **mat,char *in_name, char *alp, char *ignore, char *force,int T, char *out_name);
19 static double paviemat2gep ( int **mat, char *alp);
20 static Alignment *align_pavie_sequences (char *seq0,char *seq1,char **mat,double *gop,double *gep,int nch, char *param);
21 static int pavie_score (char *s0,int p0, char *s1,int p1,char **mat_file, double *gop, double *gep, int nch, float factor, char *param);
22 static char **seq2pavie_alp (Sequence *S, int nch);
23 static Sequence * seq2pavie_seq ( Sequence *S, int nch);
24 static FILE* output_pavie_aln (Alignment *A, int nch, FILE *fp);
25 static char **output_pavie_mat_list ( int ***current_mat, double *gep, char **alp, int nch,char *prefix,int cycle, char **mat_name);
26 static float pavie_aln2id ( Alignment *A, int mode);
27 static int check_pavie_cl ( char *string);
28 float pavie_aln2delta_age ( Alignment *A,int s0, int s1, int a0, int a1);
30 static float tgep_factor;
31 static int id_thres_used_aln;
32 static int log_odd_mode;
33 Sequence * pavie_seq2noisy_seq ( Sequence *S, int freq, char *alp)
42 x=seq2pavie_alp (S,1);
47 for (a=0; a< S->nseq; a++)
49 l1=strlen (S->seq[a]);
52 if ( (rand ()%100+1)<freq)
54 S->seq[a][b]=alp[rand()%l2];
59 Sequence * pavie_seq2random_seq ( Sequence *S, char *subst)
67 for ( a=0; a< S->nseq; a++)
68 for (b=0; b<S->len[a]; b++)
69 if ( S->seq[a][b]==r)S->seq[a][b]=subst[rand()%l];
73 double **pavie_seq2pavie_aln(Sequence *S,char *mat, char *mode)
87 check_pavie_cl (mode);
89 mat_list=declare_char (100, 100);
93 sprintf ( mat_list[nch++], "%s", mat);
98 while ( (c=fgetc(fp))!=EOF)
101 fscanf (fp, "%s\n",mat_list[nch++]);
106 alp=seq2pavie_alp (S, nch);
107 S=seq2pavie_seq (S, nch);
109 gop=vcalloc (nch, sizeof (double));
110 gep=vcalloc (nch, sizeof (double));
112 for ( a=0; a< nch; a++)
117 m=read_matrice (mat_list[a]);
118 if ((st=vstrstr(mode, "_GEP")))
120 sscanf ( st, "_GEP%d_", &v);
123 else if ( m[0][GAP_CODE]==0)
125 gep[a]=paviemat2gep(m,alp[a]);
129 gep[a]=m[0][GAP_CODE];
135 if ( (buf=vstrstr (mode, "_TGEPF")))
138 sscanf (buf, "_TGEPF%f_", &tgep_factor);
139 tgep_factor/=(float)100;
146 pavie_idmat=vtmpnam(NULL);
148 pavie_mat2pavie_id_mat (NULL,"idmat", alp[0],"X","",1,pavie_idmat);
149 dist_mat=declare_double ( S->nseq, S->nseq);
153 for ( a=0; a< S->nseq-1; a++)
155 for ( b=a+1; b< S->nseq; b++)
161 if ( ! strstr (mode, "_MSA_"))
163 A=align_pavie_sequences (S->seq[a],S->seq[b],mat_list,gop,gep,nch, mode);
164 sprintf ( A->name[0], "%s", S->name[a]);
165 sprintf ( A->name[1], "%s", S->name[b]);
170 A=strings2aln ( 2, S->name[a], S->seq[a], S->name[b], S->seq[b]);
171 sprintf ( A->seq_al[0], "%s", S->seq[a]);
172 sprintf ( A->seq_al[1], "%s", S->seq[b]);
173 A->len_aln=strlen (S->seq[a]);
177 if (strm (mode, "_ID01_"))A->score=score=pavie_aln2id (A, 1);
178 else if ( vstrstr (mode, "_ID02_"))A->score=score=pavie_aln2id (A, 2);
179 else if ( vstrstr (mode, "_ID04_"))A->score=score=pavie_aln2id (A, 4);
180 else if ( vstrstr (mode, "_ID05_"))A->score=score=pavie_aln2id (A, 5);
181 else if ( vstrstr (mode, "_ID06_"))A->score=score=pavie_aln2id (A, 6);
183 else A->score=score=pavie_aln2id (A, 1);
185 a0=S->seq[a][strlen(S->seq[a])+1];
186 a1=S->seq[b][strlen(S->seq[b])+1];
188 delta_a=pavie_aln2delta_age (A, 0, 1, a0, a1);
190 if ( vstrstr (mode, "_MATDIST_"))
191 dist_mat[a][b]=dist_mat[b][a]=(double)(vstrstr (mode, "_ID05") || vstrstr (mode, "_ID06"))?-score*100:(100-score);
192 else if ( vstrstr (mode, "_MATSIM_"))
193 dist_mat[a][b]=dist_mat[b][a]=(double)(score);
196 if ( !vstrstr (mode, "_MAT") )
198 fprintf ( stdout, "#############\nAlignment %s %s: %d %% ID SCORE %d DELTA_AGE %.2f\n", S->name[a], S->name[b], A->score, A->score_aln, delta_a);
199 output_pavie_aln (A,nch, stdout);
205 if ( vstrstr (mode, "_MAT") && !vstrstr ( mode, "_NOPRINT_"))
207 if ( vstrstr (mode, "_MFORMAT2"))
210 float *tot,s, bigtot=0;
212 for (max=0, a=0; a< S->nseq; a++)max=MAX(max,(strlen (S->name[a])));
213 tot=vcalloc ( S->nseq, sizeof (float));
214 fprintf (stdout, "# TC_DISTANCE_MATRIX_FORMAT_01\n");
215 for ( a=0; a<S->nseq; a++)
216 fprintf ( stdout, "# SEQ_INDEX %s %d\n",S->name[a],a);
217 fprintf ( stdout, "# PW_SEQ_DISTANCES \n");
218 for (n=0,a=0;a< S->nseq-1; a++)
220 for ( b=a+1; b<S->nseq; b++, n++)
224 fprintf (stdout, "BOT\t %4d %4d\t %5.2f %*s\t %*s\t %5.2f\n", a,b,s,max,S->name[a], max, S->name[b], s);
225 fprintf (stdout, "TOP\t %4d %4d\t %5.2f %*s\t %*s\t %5.2f\n", b,a,s,max,S->name[b], max, S->name[a], s);
231 for ( a=0; a< S->nseq; a++)
233 fprintf (stdout, "AVG\t %d\t %*s\t %*s\t %5.2f\n", a,max,S->name[a], max, "*", tot[a]/(S->nseq-1));
236 fprintf (stdout, "TOT\t %*s\t %*s\t %5.2f\n", max,"TOT", max, "*", bigtot/n);
241 for ( a=0; a<S->nseq; a++)
243 fprintf ( stdout, "\n%s ", S->name[a]);
244 for ( b=0; b< S->nseq; b++)
245 fprintf ( stdout, "%6d ", (int)(dist_mat[a][b]*100));
253 float pavie_aln2delta_age ( Alignment *A,int s0, int s1, int a0, int a1)
255 int a,r0, r1, g0, g1, n;
257 for (n=0, delta=0, a=0; a< A->len_aln; a++)
268 delta+=FABS((a0-a1));
272 delta/=(float)((n)?n:1);
276 int **pavie_seq2trained_pavie_mat(Sequence *S, char *param)
284 double d,delta_min=10;
290 char pavie_idmat[100];
297 check_pavie_cl (param);
299 if ( !param)param=vcalloc (1, sizeof (char));
301 if ((b=vstrstr(param,"_THR")))sscanf ( b, "_THR%d_", &id_threshold);
305 if ((b=vstrstr(param,"_SAMPLE")))sscanf ( b, "_SAMPLE%d_", &sample_size);
306 if ((b=vstrstr(param,"_PARALOGOUS")))
308 sscanf ( b, "_PARALOGOUS%d_", &sample_size);
312 if ((b=vstrstr(param,"_CHANNEL")))sscanf ( b, "_CHANNEL%d_", &nch);
315 if ( (buf=vstrstr (param, "_TGEPF")))
317 sscanf (buf, "_TGEPF%f_", &tgep_factor);
318 tgep_factor/=(float)100;
324 if ( (buf=vstrstr (param, "_PAMLOGODD_")))
329 gep=vcalloc (nch, sizeof (double));
330 gop=vcalloc (nch, sizeof (double));
331 mat_file=declare_char ( nch, 100);
332 current_mat =vcalloc ( nch, sizeof (double**));
333 previous_mat=vcalloc ( nch, sizeof (double**));
336 sprintf (ignore, "X");
338 sprintf ( pavie_idmat, "pavie_idmat");
342 alp=seq2pavie_alp (S, nch);
344 S=seq2pavie_seq (S, nch);
346 pavie_mat2pavie_id_mat (NULL,"idmat", alp[0],ignore,force,1,pavie_idmat);
348 for ( a=0; a<nch; a++)sprintf (mat_file[a], "idmat");
351 fmat=pavie_seq2pavie_fmat ( S,gop,gep,mat_file,pavie_idmat, id_threshold, sample_size, nch, param);
354 for (a=0; a<nch; a++)
356 current_mat[a]=pavie_fmat2pavie_logodd_mat(fmat[a], alp[a]);
357 gep[a]=paviemat2gep(current_mat[a], alp[a]);
359 free_arrayN((void*)fmat, 3);
361 mat_file=output_pavie_mat_list ( current_mat,gep, alp, nch,"", n++, mat_file);
365 fprintf ( stdout, "\n");
366 previous_mat=vcalloc ( nch, sizeof (int**));
367 while ((d=mc_delta_matrix (previous_mat, current_mat, alp, nch))>delta_min)
370 fprintf ( stdout, "\nDelta=%d: ",(int) d);
371 for (a=0; a<nch; a++)
373 free_int (previous_mat[a], -1);
374 previous_mat[a]=current_mat[a];
376 fprintf ( stdout, "\n");
378 fmat=pavie_seq2pavie_fmat (S,gop,gep,mat_file, pavie_idmat, id_threshold, sample_size, nch, param);
381 for (a=0; a< nch; a++)
383 current_mat[a]=pavie_fmat2pavie_logodd_mat(fmat[a], alp[a]);
384 gep[a]=paviemat2gep(current_mat[a], alp[a]);
387 mat_file=output_pavie_mat_list ( current_mat,gep, alp, nch,"", n, mat_file);
388 free_arrayN((void*)fmat, 3);
392 fprintf ( stdout, "\nDelta=%d Mat: ",(int) d);
393 for (a=0; a<nch; a++)
395 fprintf ( stdout, "%s ",mat_file[a]);
397 fprintf ( stdout, "\n");
399 return current_mat[0];
402 double mc_delta_matrix ( int ***mat1, int ***mat2, char **alp, int nch)
406 if ( !mat1 || !mat2) return 100000;
407 for ( a=0; a< nch; a++)
408 delta+=delta_matrix (mat1[a], mat2[a], alp[a]);
412 double delta_matrix ( int **mat1,int **mat2, char *alp)
418 if ( mat1==NULL || mat2==NULL) return 100000;
421 for (delta=0, a=0; a< ns; a++)
422 for ( b=0; b< ns; b++)
424 v=mat1[alp[a]-'A'][alp[b]-'A']-mat2[alp[a]-'A'][alp[b]-'A'];
432 double ***pavie_seq2pavie_fmat (Sequence *S,double *gop, double *gep, char **mat, char *idmat, int id_threshold, int sample_size, int nch, char *param )
442 fmat=vcalloc ( nch, sizeof (double **));
447 for (tot=0, a=0; a< S->nseq-exclude_id; a++)
450 output_completion ( stderr,a+1,S->nseq,1, "");
452 for ( b=a+exclude_id; b< S->nseq; b++)
456 A=align_pavie_sequences (S->seq[a],S->seq[b],mat,gop,gep,nch,param);
458 for ( chan=0; chan< nch; chan++)
461 fmat[chan]=pavie_aln2fmat (A, fmat[chan], idmat, id_threshold, chan, nch, param);
472 if ( sample_size>0 && !list)
474 if ( exclude_id==0)sample_size*=3;
477 list=declare_int ((sample_size+1), 2);
480 while (tot<sample_size)
482 a=rand()%(S->nseq);b=rand()%(S->nseq);
485 list[tot][0]=a;list[tot][1]=b;
489 list[tot][0]=a;list[tot][1]=a;
491 list[tot][0]=b;list[tot][1]=b;
498 else if ( sample_size<0 && !list)
503 sim=seq2sim_mat (S, "idmat");
505 list=declare_int (S->nseq*S->nseq, 2);
507 m=S->nseq-exclude_id;
509 for ( b=a+exclude_id; b<S->nseq; b++)
511 if ( sim[a][b]>sample_size)
516 fprintf ( stderr, "\n%s %s: %d", S->name[a], S->name[b], sim[a][b]);
517 fprintf ( stderr, "\nKeep %s Vs %s : %d%% ID", S->name[a], S->name[b], sim[a][b]);
523 for (c=0; c<tot; c++)
525 a=list[c][0];b=list[c][1];
526 A=align_pavie_sequences (S->seq[a],S->seq[b],mat,gop,gep,nch, param);
527 for (chan=0; chan< nch; chan++)
528 fmat[chan]=pavie_aln2fmat (A, fmat[chan], idmat, id_threshold,chan, nch, param);
531 output_completion ( stderr,c,tot,1, "");
534 fprintf ( stderr, "\n\tSample_size: %d Used alignments: %d\n", tot, id_thres_used_aln);
540 int **pavie_fmat2pavie_logodd_mat (double **fmat, char *alp)
551 mat=declare_int (256, 256);
553 for ( a=0; a<ns; a++)
556 fprintf ( stderr, "\n\tSymbol %c Freq %5.2f %%", s1, ((float)fmat[s1][0]*100)/(float)fmat[0][0]);
564 s1=tolower(alp[a]);S1=toupper(alp[a]);
565 s2=tolower(alp[b]);S2=toupper(alp[b]);
567 if ( log_odd_mode==0)
569 r1=(fmat[s1][s2]+1)/(fmat[s1][s1]+1);
570 r2=(fmat[s2][s1]+1)/(fmat[s2][s2]+1);
571 logodd=(int)(((log(r1)+log(r2))/2)*PAVIE_MAT_FACTOR);
573 else if ( log_odd_mode==1)
576 r1=(fmat[s1][s2]+fmat[s2][s1]+1)/(fmat[1][1]+1);
577 r2=((fmat[s1][1]+1)/(fmat[1][1]+1))*((fmat[s2][1]+1)/(fmat[1][1]+1))*2;
578 logodd=(int)(log(r1/r2)*PAVIE_MAT_FACTOR);
581 mat[s1-'A'][s2-'A']=logodd;
582 mat[S1-'A'][S2-'A']=logodd;
583 mat[S1-'A'][s2-'A']=logodd;
584 mat[s1-'A'][S2-'A']=logodd;
590 double **pavie_aln2fmat(Alignment *A, double **fmat, char *idmat, int id_threshold, int ch, int nch, char *param)
605 if ( fmat==NULL)fmat=declare_double(300, 300);
607 if ( vstrstr (param, "_TWE00_"))w=100;
608 else if ( vstrstr (param, "_TWE01_"))w=pavie_aln2id (A, 1);
609 else if ( vstrstr (param, "_TWE02_"))w=pavie_aln2id (A, 2);
610 else if ( vstrstr (param, "_TWE03_"))w=pavie_aln2id (A, 3);
611 else if ( vstrstr (param, "_TWE04_"))w=pavie_aln2id (A, 4);
612 else if ( vstrstr (param, "_TWE05_"))w=pavie_aln2id (A, 5);
613 else if ( vstrstr (param, "_TWE06_"))w=pavie_aln2id (A, 6);
615 else w=pavie_aln2id (A, 3);
617 id=pavie_aln2id(A, 3);
623 A->seq_al[0]-=start;A->seq_al[1]-=start;
629 for ( a=0; a<A->len_aln; a++)
631 c1=tolower(A->seq_al[0][a]);
632 c2=tolower(A->seq_al[1][a]);
645 A->seq_al[0]-=start;A->seq_al[1]-=start;
651 int pavie_mat2pavie_id_mat ( int **mat,char *in_name, char *alp, char *ignore, char *force,int T, char *out_name)
658 if (mat==NULL && in_name==NULL) return 0;
661 mat=read_matrice (in_name);
665 idmat=declare_int ( 256, 256);
670 for (a=0; a< n1; a++)
671 for ( b=0; b<n1; b++)
673 s1=tolower(alp[a])-'A';S1=toupper(alp[a])-'A';
674 s2=tolower(alp[b])-'A';S2=toupper(alp[b])-'A';
675 idmat[s1][s2]=idmat[s1][S2]=idmat[S1][S2]=idmat[S1][s2]=(mat[s1][s2]>=T)?PAVIE_MAT_FACTOR:0;
680 s1=tolower(force[a])-'A';S1=toupper(force[a])-'A';
681 s2=tolower(alp[b])-'A';S2=toupper(alp[b])-'A';
682 idmat[s1][s2]=idmat[s1][S2]=idmat[S1][S2]=idmat[S1][s2]=PAVIE_MAT_FACTOR;
688 s1=tolower(ignore[a])-'A';S1=toupper(ignore[a])-'A';
689 s2=tolower(alp[b])-'A';S2=toupper(alp[b])-'A';
690 idmat[s1][s2]=idmat[s1][S2]=idmat[S1][S2]=idmat[S1][s2]=0;
696 output_pavie_mat (idmat, out_name, 0, alp);
697 free_int (idmat, -1);
700 double paviemat2gep ( int **mat, char *alp)
707 for (a=0; a<l-1; a++)
708 for ( b=a+1; b< l; b++)
710 gep+=mat[alp[a]-'A'][alp[b]-'A'];
719 Alignment *align_pavie_sequences (char *seq0,char *seq1,char **mat,double *gop,double *gep,int nch, char *param)
726 double match, gap_inX, gap_inY, MXY=1, GX=2, GY=3;
729 char *bufx, *bufy, *buf;
737 terminal gaps are set to gep=gep*factor
740 if ( strm (seq0, seq1))
742 A=declare_aln2 (2,strlen(seq0)+1);
743 A->len_aln=strlen (seq0);
745 A->score=A->score_aln=100;
746 sprintf ( A->seq_al[0], "%s", seq1);
747 sprintf ( A->seq_al[1], "%s", seq0);
759 ax=vcalloc ( (YL+XL)*nch+1, sizeof (char));
760 ay=vcalloc ( (YL+XL)*nch+1, sizeof (char));
761 bufx=vcalloc ( (YL+XL)*nch+1, sizeof (char));
762 bufy=vcalloc ( (YL+XL)*nch+1, sizeof (char));
764 F=declare_double (XL+2, YL+2);
765 T=declare_int (XL+2, YL+2);
770 for(i = 1; i <=XL; i++)
773 F[i][0] = F[i-1][0]+pavie_score (x,i-1,NULL,GAP_CODE,mat, gop, gep, nch, factor, param) /*CL->M[x[i-1]-'A'][gap]*/;
778 for(j = 1; j <= YL; j++)
781 F[0][j] = F[0][j-1]+pavie_score (NULL,GAP_CODE,y,j-1,mat, gop, gep, nch, factor, param)/*CL->M[y[j-1]-'A'][gap]*/;
786 for(i = 1; i <= XL; i++)
788 for(j = 1; j <= YL; j++)
791 match = F[i-1][j-1] + /*CL->M[x[i-1]-'A'][y[j-1]-'A']*/pavie_score (x,i-1,y, j-1,mat, gop, gep, nch, 1, param);
792 gap_inY= F[i-1][j] + /*CL->M[x[i-1]-'A'][gap]*/ pavie_score (x,i-1, NULL,GAP_CODE,mat, gop, gep, nch, (j==YL)?factor:1, param);
793 gap_inX= F[i][j-1] + /*+ CL->M[y[j-1]-'A'][gap]*/ pavie_score (NULL,GAP_CODE,y, j-1,mat, gop, gep, nch, (i==XL)?factor:1, param);
795 if ( match >= gap_inY && match >=gap_inX){F[i][j]=match; T[i][j]=MXY;}
796 else if ( gap_inX>=gap_inY){F[i][j]=gap_inX; T[i][j]=GX;}
797 else {F[i][j]=gap_inY; T[i][j]=GY;}
806 while(!(i==0 && j==0))
814 else if ( T[i][j]==GY)
819 else if ( T[i][j]==GX)
827 for (a=0; a<len; a++)
828 for (b=1; b<nch; b++)
838 sprintf ( bufx, "%s", ax);
839 sprintf ( bufy, "%s", ay);
847 buf=ax;ax=invert_string (ax);vfree(buf);
848 buf=ay;ay=invert_string (ay);vfree(buf);
851 A=declare_aln2 (2,strlen(ax)+1);
854 A->len_aln=strlen (ax);
856 A->score=A->score_aln=F[XL][YL];
858 for (a=0, b=0, c=0; a<A->len_aln; a++)
860 if (ax[a]==1)ax[a]=seq0[b++];
861 if (ay[a]==1)ay[a]=seq1[c++];
866 sprintf ( A->seq_al[0], "%s", ax);
867 sprintf ( A->seq_al[1], "%s", ay);
869 vfree (ax); vfree(ay);vfree (bufx); vfree (bufy);free_double(F, -1); free_int (T, -1);
874 int pavie_score (char *s0,int p0, char *s1,int p1,char **mat_file, double *gop, double *gep, int nch, float factor, char *param)
880 static int mchscore=-1;
887 strget_param ( param, "_AGECHANNEL", "-1", "%d", &use_age);
892 strget_param (param, "_MCHSCORE", "0", "%d", &mchscore);
896 if ( !cmat || !mat_file || !strm (cmat, mat_file[0]))
898 if ( !cmat)cmat=vcalloc ( 100, sizeof (char));
899 sprintf ( cmat, "%s", (mat_file)?mat_file[0]:"idmat");
900 if ( !mat)mat=vcalloc ( nch, sizeof (int**));
901 for ( a=0; a< nch; a++)
903 if ( mat[a])free_int (mat[a], -1);
904 mat[a]=read_matrice ((mat_file)?mat_file[a]:"idmat");
909 l0=(s0)?strlen (s0)/nch:0;
910 l1=(s1)?strlen (s1)/nch:0;
913 else if (mchscore==1) score=999999;
914 else if (mchscore==2)score=-9999999;
917 HERE ("Error: mchscore >2 [FATAL]\n");
920 for ( a=0; a< nch; a++)
923 c0=(s0)?s0[l0*a+p0]-'A':p0;
924 c1=(s1)?s1[l1*a+p1]-'A':p1;
925 if ( c0==GAP_CODE)s=(gep[a]!=0)?gep[a]:mat[a][c1][GAP_CODE];
926 else if ( c1==GAP_CODE)s=(gep[a]!=0)?gep[a]:mat[a][c0][GAP_CODE];
927 else s=mat[a][c0][c1];
929 if (mchscore==0)score+=s;
930 else if (mchscore==1)score=MIN(s, score);
931 else if (mchscore==2)score=MAX(s, score);
936 if ( use_age>0 && s0 && s1)
947 s=use_age*FABS((a0-a1))*-1;
949 if (mchscore==0)score+=s;
950 else if (mchscore==1)score=MIN(s, score);
951 else if (mchscore==2)score=MAX(s, score);
958 Sequence * seq2pavie_seq ( Sequence *S, int nch)
965 for (b=0; b<S->nseq; b++)
968 buf=vcalloc ((strlen (S->seq[b])*nch)+10, sizeof (char));
969 for ( a=0; a< nch; a++)
971 strcat (buf, S->seq[b+(S->nseq)*a]);
972 vfree ( S->seq[b+(S->nseq)*a]);
975 /*Code Age on the byte just after the string termination*/
977 if ((p=strstr (S->seq_comment[b], "FIRSTYEAR")))
979 sscanf ( p, "FIRSTYEAR%d", (int*)&(S->seq[strlen(buf)+1]));
985 char **seq2pavie_alp (Sequence *S, int nch)
991 alp=vcalloc (nch, sizeof (char*));
992 for ( a=0; a< nch; a++)
994 alp[a]=array2alphabet (S->seq+n*a, n, "-.");
998 FILE *output_pavie_aln (Alignment *A, int nch, FILE *fp)
1000 int a, b, c,d, l, start, end;
1003 B=declare_aln2(A->nseq*nch+nch, A->len_aln);
1009 for ( a=0; a< nch; a++)
1011 for (b=0; b< A->nseq; b++, B->nseq++)
1013 sprintf (B->name[B->nseq], "%s.c%d", A->name[b], a);
1014 start=l*a;end=start+l;
1015 for (d=0,c=start; c<end; c++, d++)B->seq_al[B->nseq][d]=A->seq_al[b][c];
1016 B->seq_al[B->nseq][d]='\0';
1020 B->name[B->nseq][0]='\0';
1021 for ( b=0; b<l; b++)B->seq_al[B->nseq][b]='^';
1027 fp=output_Alignment_without_header (B,fp);
1029 free_sequence (S, S->nseq);
1033 char **output_pavie_mat_list ( int ***current_mat, double *gep, char **alp, int nch,char *prefix,int cycle, char **mat_name)
1036 char mat_list_name[100];
1042 sprintf ( mat_list_name, "pavie_matrix%s.cycle_%d.mat_list", prefix, cycle+1);
1043 fp=vfopen ( mat_list_name, "w");
1044 fprintf ( stderr, "\n\tOutput Pavie Matrix: %s", mat_list_name);
1045 for ( a=0; a< nch; a++)
1047 sprintf ( mat_name[a], "pavie_matrix%s.ch_%d.cy_%d.pavie_mat", prefix,a+1, cycle+1);
1048 sprintf (latest, "pavie_matrix%s.ch_%d.cy_last.pavie_mat",prefix,a+1);
1049 sprintf ( current, "matrix.ch%d.pavie_mat", a);
1050 fprintf ( stderr, "\n\t Channel %d Matrix: %s",a+1, mat_name[a]);
1051 output_pavie_mat (current_mat[a],mat_name[a],gep[a], alp[a]);
1052 sprintf ( command, "cp %s %s", mat_name[a], latest);
1054 sprintf ( command, "cp %s %s", latest, current);
1056 fprintf ( fp, "%s\n", mat_name[a]);
1063 int pavie_pair_wise (Alignment *A,int *ns, int **l_s,Constraint_list *CL )
1065 double **F; int **T;
1070 double match, gap_inX, gap_inY, MXY=1, GX=2, GY=3;
1077 decreases terminal gap penalties with a factor X
1078 factor=1: terminal gap penalties <=> internal gap penalties
1083 x=A->seq_al[l_s[0][0]];
1084 y=A->seq_al[l_s[1][0]];
1088 ax=vcalloc ( YL+XL+1, sizeof (char));
1089 ay=vcalloc ( YL+XL+1, sizeof (char));
1092 F=declare_double (XL+2, YL+2);
1093 T=declare_int (XL+2, YL+2);
1098 for(i = 1; i <=XL; i++)
1101 F[i][0] = F[i-1][0]+(CL->M[x[i-1]-'A'][gap]*factor);
1105 for(j = 1; j <= YL; j++)
1107 F[0][j] = F[0][j-1]+CL->M[y[j-1]-'A'][gap]*factor;
1112 for(i = 1; i <= XL; i++)
1114 for(j = 1; j <= YL; j++)
1117 match = F[i-1][j-1] + CL->M[x[i-1]-'A'][y[j-1]-'A'];
1118 gap_inY= F[i-1][j] + (CL->M[x[i-1]-'A'][gap]*(j==YL)?factor:1);
1119 gap_inX= F[i][j-1] + (CL->M[y[j-1]-'A'][gap]*(i==XL)?factor:1);
1121 if ( match >= gap_inY && match >=gap_inX){F[i][j]=match; T[i][j]=MXY;}
1122 else if ( gap_inX>=gap_inY){F[i][j]=gap_inX; T[i][j]=GX;}
1123 else {F[i][j]=gap_inY; T[i][j]=GY;}
1126 /*Trace back stage*/
1127 A->score=A->score_aln=F[XL][YL];
1132 while(!(i==0 && j==0))
1140 else if ( T[i][j]==GY)
1145 else if ( T[i][j]==GX)
1155 ix=invert_string (ax); iy=invert_string(ay);
1156 A=realloc_aln (A,len+1);
1158 sprintf ( A->seq_al[l_s[0][0]], "%s", ix);
1159 sprintf ( A->seq_al[l_s[1][0]], "%s", iy);
1163 vfree (ax); vfree(ay);vfree(ix); vfree(iy); free_double(F, -1); free_int (T, -1);
1167 float pavie_aln2id ( Alignment *A, int mode)
1169 int a, id=0, match=0, l1=0, l2=0, r1, r2, is_res1, is_res2;
1173 for (a=0; a<A->len_aln; a++)
1179 is_res1=(!is_gap(r1) && r1!='x' && r1!='X')?1:0;
1180 is_res2=(!is_gap(r2) && r2!='x' && r2!='X')?1:0;
1186 if ( is_res1 && is_res2 )
1194 if ( mode==1)return (match==0)?0:((id*100)/match);
1195 else if (mode ==2) return (A->len_aln==0)?0:((id*100)/A->len_aln);
1196 else if (mode ==3) return (MIN(l1,l2)==0)?0:((id*100)/(MIN(l1,l2)));
1197 else if (mode ==4) return (MAX(l1,l2)==0)?0:((id*100)/(MAX(l1,l2)));
1198 else if (mode ==5)return (A->score_aln * -1)/*/PAVIE_MAT_FACTOR*/;
1199 else if (mode ==6)return ((MAX(l1,l2)==0)?0:((A->score_aln)/(MAX(l1,l2))))*-1;
1202 fprintf ( stderr, "\nUnknown Mode [pavie_aln2id:FATAL:%s]", PROGRAM);
1203 myexit (EXIT_FAILURE);
1204 return EXIT_FAILURE;
1209 int check_pavie_cl ( char *string)
1211 if ( !string || string[0]=='\0' ) return 1;
1212 else if (( string[0]!='_') ||string [strlen (string)-1]!='_')
1214 fprintf ( stderr, "ERROR: parameters must start and finish with an underscore: _parameters_ [FATAL:%s]\n", PROGRAM);
1215 myexit (EXIT_FAILURE);
1220 Alignment *pavie_seq2pavie_sort ( Sequence *S, char *mat, char *mode)
1222 int a, b, c=0, avg_c;
1228 if ( vstrstr (mode, "_IDSORT_") || vstrstr (mode, "_MASTERSORT"))
1231 buf=vcat ( mode, "_MATDIST_NOPRINT_");
1232 dm=pavie_seq2pavie_aln (S, mat,buf);
1233 avg=declare_int (S->nseq, 2);
1234 if ( vstrstr (mode,"_IDSORT_"))
1237 for ( a=0; a< S->nseq; a++)
1240 for ( b=0; b<S->nseq; b++)
1241 if ( b!=a)avg[a][1]+=(int) dm[a][b];
1242 avg[a][1]/=S->nseq-1;
1244 sort_int ( avg, 2, 1, 0, S->nseq-1);
1248 fprintf ( stderr, "\nAVG\t %s\t %s\t %d",S->name[c],"avg", avg_c);
1250 else if ( vstrstr (mode, "_MASTERSORT"))
1254 s=vstrstr(mode, "_MASTERSORT");
1255 mode=substitute ( mode, "_", " ");
1256 sscanf (s, " MASTERSORT%s", name);
1258 mode=substitute (mode, " ", "_");
1259 c=name_is_in_list ( name, S->name, S->nseq, 100);
1263 fprintf ( stderr, "\nERROR: Sequence %s is not in the dataset [FATAL:%s]", name, PROGRAM);
1264 myexit (EXIT_FAILURE);
1268 for ( a=0; a<S->nseq; a++)
1271 if ( a!=c)avg[a][1]=dm[c][a];
1275 sort_int ( avg, 2, 1, 0, S->nseq-1);
1277 new_order=declare_char ( S->nseq, 100);
1278 sprintf (new_order[0], "%s", S->name[c]);
1279 for ( a=1; a<S->nseq; a++)
1281 sprintf ( new_order[a], "%s", S->name[avg[a][0]]);
1283 fprintf ( stderr, "\nTOP\t %s\t %s\t %d", S->name[c],new_order[a] , avg[a][1]);
1286 fprintf ( stderr, "\n");
1287 A=seq2aln (S, NULL,RM_GAP);
1288 A=reorder_aln (A, new_order, A->nseq);
1290 free_double(dm, -1);free_int (avg, -1);free_char (new_order, -1);
1292 else if ( vstrstr ( mode, "_TREESORT_"))
1294 A=pavie_seq2pavie_msa (S, mat, mode);
1298 fprintf ( stderr, "\nERROR: pavie_seq2sort <matrix> <_IDSORT_ | _TREESORT_>");
1303 NT_node pavie_seq2pavie_tree (Sequence *S, char *mat, char *mode)
1306 char *tree_name,*buf;
1309 buf=vcat (mode,"_MATDIST_NOPRINT_");
1310 dm=pavie_seq2pavie_aln (S, mat,buf);
1311 dist2nj_tree (dm,S->name, S->nseq,tree_name=vtmpnam (NULL));
1313 free_double(dm, -1);vfree (buf);
1315 return main_read_tree (tree_name);
1318 Alignment* pavie_seq2pavie_msa ( Sequence *S, char *mat_in, char *mode)
1320 Constraint_list *CL;
1328 A=seq2aln (S, NULL, RM_GAP);
1329 CL=declare_constraint_list (S, NULL, NULL, 0, NULL, NULL);
1330 sprintf ( CL->dp_mode, "myers_miller_pair_wise");
1331 sprintf ( CL->tree_mode, "nj");
1332 sprintf ( CL->distance_matrix_mode, "idscore");
1333 CL=choose_extension_mode ("matrix", CL);
1335 if ( !is_matrix (mat_in))
1338 fp=vfopen ( mat_in, "r");
1339 fscanf (fp, "%s", mat);
1341 add_warning( stderr, "\nWarning: Multiple Channel Not Supported. Used First Channel Only for MSA [Matrix: %s][WARNING:%s]", mat, PROGRAM);
1345 sprintf ( mat, "%s", mat_in);
1348 CL->M=read_matrice (mat);
1351 alp=seq2pavie_alp (S, 1);
1352 CL->gep=paviemat2gep(CL->M, alp[0]);
1355 CL->pw_parameters_set=1;
1356 CL->local_stderr=stderr;
1358 if ( vstrstr (mode, "_QUICKTREE_"))
1360 FT=make_tree (A, CL, CL->gop, CL->gep,S, NULL,MAXIMISE);
1363 else if ( (s=vstrstr (mode, "_USETREE")))
1366 mode=substitute ( mode, "_", " ");
1367 sscanf (s, " USETREE%s", fname);
1368 mode=substitute (mode, " ", "_");
1369 T=main_read_tree (fname);
1373 T=pavie_seq2pavie_tree ( S, mat_in, mode);
1376 for ( a=0; a< A->nseq; a++)ungap (A->seq_al[a]);
1378 tree_aln (T->left,T->right,A,(CL->S)->nseq, CL);
1379 A=reorder_aln ( A,A->tree_order,A->nseq);
1383 /*********************************COPYRIGHT NOTICE**********************************/
1384 /*© Centro de Regulacio Genomica */
1386 /*Cedric Notredame */
1387 /*Tue Oct 27 10:12:26 WEST 2009. */
1388 /*All rights reserved.*/
1389 /*This file is part of T-COFFEE.*/
1391 /* T-COFFEE is free software; you can redistribute it and/or modify*/
1392 /* it under the terms of the GNU General Public License as published by*/
1393 /* the Free Software Foundation; either version 2 of the License, or*/
1394 /* (at your option) any later version.*/
1396 /* T-COFFEE is distributed in the hope that it will be useful,*/
1397 /* but WITHOUT ANY WARRANTY; without even the implied warranty of*/
1398 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/
1399 /* GNU General Public License for more details.*/
1401 /* You should have received a copy of the GNU General Public License*/
1402 /* along with Foobar; if not, write to the Free Software*/
1403 /* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/
1404 /*............................................... |*/
1405 /* If you need some more information*/
1406 /* cedric.notredame@europe.com*/
1407 /*............................................... |*/
1411 /*********************************COPYRIGHT NOTICE**********************************/