+++ /dev/null
-#include <stdio.h>\r
-#include <stdlib.h>\r
-#include <math.h>\r
-#include <stdarg.h>\r
-#include <string.h>\r
-#include <ctype.h> \r
-#include "io_lib_header.h"\r
-#include "util_lib_header.h"\r
-#include "dp_lib_header.h"\r
-#include "define_header.h"\r
-\r
-int aln_has_stockholm_structure (Alignment *A)\r
-{\r
- return name_is_in_list ("#=GC SS_cons", A->name, A->nseq, 100);\r
-}\r
-\r
-int get_aln_stockholm_structure (Alignment *A)\r
-{\r
- int i;\r
- if ((i=aln_has_stockholm_structure(A))==-1)\r
- A=add_alifold2aln (A, NULL);\r
- return aln_has_stockholm_structure(A);\r
-}\r
-int ** update_RNAfold_list (Alignment *A, int **pos, int s, int **l)\r
-{\r
- int a=0;\r
- while (l[a])\r
- {\r
- if (!is_gap(A->seq_al[s][l[a][0]]) && !is_gap (A->seq_al[s][l[a][1]]))\r
- {\r
- l[a][2]=pos[s][l[a][0]];\r
- l[a][3]=pos[s][l[a][1]];\r
- }\r
- else\r
- {\r
- l[a][2]=l[a][3]=-1;\r
- }\r
- a++;\r
- }\r
- return l;\r
-}\r
-\r
-Alignment *compare_RNA_fold ( Alignment *A, Alignment *B)\r
-{\r
- int i1, i2, i;\r
- int **l1, **l2;\r
- int **pos1, **pos2;\r
- int a, b;\r
- int tot_ol=0, tot_l=0;\r
- \r
- i1=get_aln_stockholm_structure (A);\r
- i2=get_aln_stockholm_structure (B);\r
- \r
- l1=vienna2list (A->seq_al[i1]);\r
- l2=vienna2list (B->seq_al[i2]);\r
- \r
- pos1=aln2pos_simple(A, A->nseq);\r
- pos2=aln2pos_simple(B, B->nseq);\r
- \r
- \r
- \r
- for (a=0; a< A->nseq; a++)\r
- {\r
- char **lu;\r
- int ol=0, ll1=0, ll2=0;\r
- if ( A->name[a][0]=='#')continue;\r
- i=name_is_in_list (A->name[a], B->name, B->nseq, 100);\r
- if (i!=-1)\r
- {\r
- l1=update_RNAfold_list (A,pos1,a, l1);\r
- l2=update_RNAfold_list (B,pos2,i, l2);\r
- lu=declare_char (A->len_aln, B->len_aln);\r
- \r
- b=0;\r
- while (l2[b])\r
- {\r
- \r
- if (l2[b][2]==-1 || l2[b][3]==-1);\r
- else\r
- {\r
- ll2++;\r
- lu[l2[b][2]][l2[b][3]]=1;\r
- \r
- }\r
- b++;\r
- }\r
- b=0;\r
- \r
- while (l1[b])\r
- {\r
- \r
- if (l1[b][2]==-1 || l1[b][3]==-1);\r
- else\r
- {\r
- ll1++;\r
- if (lu[l1[b][2]][l1[b][3]]==1)\r
- {\r
- A->seq_al[a][l1[b][0]]='6';\r
- A->seq_al[a][l1[b][1]]='6';\r
- ol++;\r
- }\r
- else\r
- {\r
- A->seq_al[a][l1[b][0]]='0';\r
- A->seq_al[a][l1[b][1]]='0';\r
- }\r
- }\r
- b++;\r
- }\r
- \r
- free_char (lu, -1);\r
- }\r
- tot_ol+=ol;\r
- tot_l+=ll1;\r
- tot_l+=ll2;\r
- fprintf ( stdout, "@@ Seq: %s Overalp: %.2f Al1: %.2f Al2: %.2f \n", A->name[a], (float)(ol*200)/(ll1+ll2), (float)(ol*100)/ll1,(float)(ol*100)/ll2);\r
- }\r
- \r
- fprintf ( stdout, "@@ Seq: Tot Overalp: %.2f \n", (float)(tot_ol*200)/(tot_l));\r
- \r
- return A;\r
-}\r
-int is_neutral(char c1, char c2);\r
-int is_watson (char c1, char c2);\r
-int is_watson2 (char c1, char c2);\r
-int is_watson (char c1, char c2)\r
-{\r
- c1=tolower (c1);\r
- c2=tolower (c2);\r
- if ( is_watson2 (c1, c2)) return 1;\r
- else return is_watson2 (c2, c1);\r
-}\r
-int is_watson2 (char c1, char c2)\r
-{\r
-\r
- if ( c1=='g' && c2=='c')return 1;\r
- else if (c1=='a' && (c2=='t' || c2=='u'))return 1;\r
- return 0;\r
-}\r
-int is_neutral (char c1, char c2)\r
-{\r
- \r
- c1=tolower (c1);\r
- c2=tolower (c2);\r
- if (is_watson (c1, c2)) return 1;\r
- else if (c1=='g' && (c2=='t' || c2=='u'))return 1;\r
- else if ((c1=='t' || c1=='u') && c2=='g')return 1;\r
- return 0;\r
-}\r
-\r
-int ** vienna2list ( char *seq)\r
-{\r
- int a, b, i, i2,l;\r
- int **list;\r
- l=strlen (seq);\r
- list=declare_int (l+1, 8);\r
- for (i=0,a=0; a<l; a++)\r
- {\r
- if ( seq[a]=='(')\r
- {\r
- list[i][0]=a;\r
- for (i2=0,b=a+1; b<l && i2>=0; b++)\r
- {\r
- if (seq[b]=='(')i2++;\r
- else if (seq[b]==')')i2--;\r
- }\r
- list[i][1]=b-1;\r
- i++;\r
- }\r
- }\r
- \r
- list[i]=NULL;\r
- return list;\r
-}\r
-Alignment *aln2alifold(Alignment *A)\r
-{\r
- char *tmp1;\r
- char *tmp2;\r
- \r
- tmp1=vtmpnam (NULL);\r
- tmp2=vtmpnam (NULL);\r
- output_clustal_aln (tmp1,A);\r
- printf_system ("RNAalifold %s >%s 2>/dev/null", tmp1, tmp2);\r
- return alifold2aln (tmp2);\r
-}\r
-\r
-Alignment *add_alifold2aln (Alignment *A, Alignment *ST)\r
-{\r
- int a,b,c,d,p1,p2;\r
- int r1, rr1, r2, rr2;\r
- int watson, comp,tot; \r
- int **compmat;\r
- int max, p,k;\r
- int minseq=3;\r
- int **list;\r
- int ncomp=0, nwatson=0;\r
- int cons_l, fold_l;\r
- int i,l;\r
-\r
- if (!ST)\r
- {\r
- char *tmp1, *tmp2;\r
- int f;\r
- Alignment *T;\r
- T=copy_aln (A, NULL);\r
- tmp1=vtmpnam (NULL);\r
- tmp2=vtmpnam (NULL);\r
- cons_l=A->len_aln;\r
- for (a=0; a<A->len_aln; a++)\r
- {\r
- for (f=0,b=0; b<A->nseq && f==0; b++)\r
- {\r
- if (is_gap (A->seq_al[b][a]))f=1;\r
- \r
- }\r
- if (f)\r
- {\r
- cons_l--;\r
- for (b=0; b<A->nseq; b++)T->seq_al[b][a]='-';\r
- }\r
- }\r
- ST=aln2alifold (T);\r
- }\r
- \r
-\r
- //add or Replace the structure\r
- l=strlen (ST->seq_al[0]);\r
- \r
- if ( l!=A->len_aln)\r
- {\r
- HERE ("\n%s\n%s\n", ST->seq_al[0], A->seq_al[0]);\r
- printf_exit ( EXIT_FAILURE, stderr, "ERROR the predicted structure and the multiple alignment do not have the same length [FATAL:%s]\n", PROGRAM);\r
- \r
- }\r
-\r
- for (a=0; a< l; a++)if (ST->seq_al[0][a]==STOCKHOLM_CHAR)ST->seq_al[0][a]='.';\r
- if ((i=name_is_in_list ("#=GC SS_cons", A->name, A->nseq, 100))!=-1)\r
- {\r
- sprintf (A->seq_al[i], "%s", ST->seq_al[0]);\r
- }\r
- else\r
- {\r
- A=realloc_aln2 ( A, A->nseq+1, A->len_aln+1);\r
- sprintf (A->name[A->nseq], "#=GC SS_cons");\r
- sprintf (A->seq_al[A->nseq], "%s", ST->seq_al[0]);\r
- A->nseq++;\r
- }\r
- return A;\r
-}\r
-Alignment * alifold2analyze (Alignment *A, Alignment *ST, char *mode)\r
-{\r
- int s;\r
- int **list;\r
- int usegap;\r
- \r
- s=name_is_in_list ("#=GC SS_cons", A->name,A->nseq, 100);\r
- \r
- if (s==-1)\r
- {\r
- A=add_alifold2aln (A,ST);\r
- s=name_is_in_list ("#=GC SS_cons", A->name,A->nseq, 100);\r
- }\r
- \r
- list=vienna2list (A->seq_al[s]);\r
- list=alifold_list2cov_list (A, list);\r
- \r
- usegap=0; //do not use gaped positions by default\r
- if (mode && strstr (mode, "usegap"))usegap=1;//count positions with gaps\r
- \r
- if (!mode)\r
- {\r
- A=alifold2cov_stat (A, list,usegap);\r
- }\r
- else\r
- {\r
- if ( strstr (mode, "stat")) A=alifold2cov_stat (A, list, usegap);\r
- if ( strstr (mode, "list")) A=alifold2cov_list (A, list, usegap);\r
- if ( strstr (mode, "aln")) A=alifold2cov_aln (A, list, usegap);\r
- if ( strstr (mode, "color") )\r
- {\r
- Alignment *C;\r
- C=copy_aln (A, NULL);\r
- C=alifold2cov_cache (C, list, usegap);\r
- A=alifold2cov_aln (A, list, usegap);\r
- if ( strstr ( mode, "ps"))\r
- output_color_ps (A, C, "stdout");\r
- else\r
- output_color_html (A, C, "stdout");\r
- exit (EXIT_SUCCESS);\r
- }\r
- }\r
- return A;\r
-}\r
-\r
-\r
-int ** alifold_list2cov_list (Alignment *A, int **list)\r
-{\r
- int a,b,c,d,p1,p2,s;\r
- int r1, rr1, r2, rr2;\r
- int neutral,watson, comp,tot, occupancy; \r
- int **compmat;\r
- int max, p,k;\r
- int minseq=3;\r
- \r
- int ncomp=0, nwatson=0, nneutral=0, ncomp_wc=0;\r
- int cons_l, fold_l;\r
- int nseq;\r
- \r
- \r
- \r
- for (nseq=0,a=0; a< A->nseq; a++)if ( A->name[a][0]!='#')nseq++;\r
- max=((nseq*(nseq-1))/2);\r
- a=0;\r
- while (list[a])\r
- {\r
- p1=list[a][0];\r
- p2=list[a][1];\r
- watson=0;\r
- comp=0;\r
- neutral=0;\r
- tot=0;\r
- occupancy=0;\r
- for (c=0; c<A->nseq-1; c++)\r
- {\r
- if (A->name[c][0]=='#')continue;\r
- r1=tolower(A->seq_al[c][p1]);\r
- r2=tolower(A->seq_al[c][p2]); \r
- if (is_gap(r1) || is_gap(r2))continue;\r
- for (d=c+1; d<A->nseq; d++)\r
- {\r
- if (A->name[d][0]=='#')continue;\r
- rr1=tolower(A->seq_al[d][p1]);\r
- rr2=tolower(A->seq_al[d][p2]);\r
- if (is_gap(rr1) || is_gap(rr2))continue;\r
- if (is_watson (r1, r2))watson++;\r
- if (is_watson (rr1, rr2))watson++;\r
- if (is_neutral (r1, r2))neutral++;\r
- if (is_neutral (rr1, rr2))neutral++;\r
- if (r1!=rr1 && r2!=rr2)comp++;\r
- occupancy++;\r
- }\r
- \r
- }\r
- watson=(watson*100)/(occupancy*2);\r
- comp=(comp*100)/occupancy;\r
- neutral=(neutral*100)/(occupancy*2);\r
- occupancy=(occupancy*100)/max;\r
- list[a][3]=neutral;\r
- list[a][4]=watson;\r
- list[a][5]=comp;\r
- list[a][6]=occupancy;\r
- \r
- if (list[a][3]<100)\r
- {\r
- if (list[a][5]>0)list[a][7]='I';//compensated incompatible pair\r
- else list[a][7]='i'; //non compensated incompatible pair\r
- }\r
- else\r
- {\r
- list[a][7]='N';//Neutral pair\r
- if (list[a][4]==100)\r
- {\r
- list[a][7]='W';//Watson and Crick\r
- if ( list[a][5]>0)list[a][7]='C'; //Watson and crick compensated\r
- }\r
- else if ( list[a][5]>0)\r
- {\r
- list[a][7]='c';//compensated\r
- }\r
- }\r
- a++;\r
- }\r
- \r
- return list;\r
-}\r
-Alignment *alifold2cov_aln (Alignment *inA,int **list, int ug)\r
-{\r
- int a=0;\r
- a=0;\r
- Alignment *A;\r
-\r
- A=copy_aln (inA, NULL);\r
- A=realloc_aln2 ( A, A->nseq+1, A->len_aln+1);\r
- sprintf (A->name[A->nseq], "#=GC SS_analyze");\r
- sprintf (A->seq_al[A->nseq], "%s", A->seq_al[A->nseq-1]);\r
- A->nseq++;\r
- while (list[a])\r
- {\r
- char s;\r
- if (list[a][6]<100 && !ug);\r
- else\r
- {\r
- s=list[a][7];\r
- A->seq_al[A->nseq-1][list[a][0]]=s;\r
- A->seq_al[A->nseq-1][list[a][1]]=s;\r
- }\r
- a++;\r
- }\r
- return A;\r
-}\r
-Alignment *alifold2cov_stat (Alignment *A,int **list, int ug)\r
-{\r
- int fold=0,watson=0, comp=0, compwc=0, incomp=0, neutral=0;\r
- int a;\r
- \r
- a=0;\r
- while (list[a])\r
- {\r
- int s;\r
- fold++;\r
- if (list[a][6]<100 && !ug);\r
- else\r
- {\r
- s=list[a][7];\r
- watson +=(s=='W')?1:0;\r
- compwc +=(s=='C')?1:0;\r
- comp +=(s=='c')?1:0;\r
- neutral+=(s=='N')?1:0;\r
- incomp +=(s=='I')?1:0;\r
- }\r
- a++;\r
- }\r
- fprintf ( stdout, "@@ TOT Nseq:%d tot_len: %d fold: %d neutral: %d watson: %d CorWC: %d cor: %d CorIncompatible: %d\n",A->nseq-1, A->len_aln,fold, neutral,watson, compwc,comp,incomp);\r
- return A;\r
-}\r
-Alignment *alifold2cov_cache (Alignment *inA, int **list, int ug)\r
-{\r
- int a,b, c;\r
- Alignment *A;\r
-\r
- A=copy_aln (inA, NULL);\r
- a=0;\r
- while (list[a])\r
- {\r
- int v, s;\r
- if (list[a][6]<100 && !ug);\r
- else\r
- {\r
- s=list[a][7];\r
- if (s=='C')v=9; //red\r
- else if ( s=='c')v=7; //orange\r
- else if ( s=='W')v=5; //Yellow\r
- else if ( s=='N')v=2; //green\r
- else if ( s=='I')v=0; //blue;\r
- for (b=0;b<A->nseq; b++)\r
- {\r
- if (A->name[b][0]=='#');\r
- else\r
- {\r
- for (c=0; c<2; c++)\r
- {\r
- A->seq_al[b][list[a][c]]='0'+v;\r
- }\r
- }\r
- }\r
- }\r
- a++;\r
- }\r
- return A;\r
-}\r
-\r
-Alignment *alifold2cov_list (Alignment *A,int **list, int ug)\r
-{\r
- int a,b, s;\r
- \r
- a=0;\r
- while (list[a])\r
- {\r
- s=list[a][7];\r
- if (list[a][6]<100 && !ug);\r
- else if (s=='C')\r
- {\r
- fprintf ( stdout, "@@ WC Compensated pair: %4d %4d =>", list[a][0]+1, list [a][1]+1);\r
- for (b=0; b<A->nseq; b++)if (A->name[b][0]!='#')fprintf ( stdout, "[%c%c]", toupper (A->seq_al[b][list[a][0]]), toupper(A->seq_al[b][list[a][1]]));\r
- fprintf (stdout,"\n");\r
- }\r
- else if (s=='c')\r
- {\r
- fprintf ( stdout, "@@ Neural Compensated pair: %4d %4d =>", list[a][0]+1, list [a][1]+1);\r
- for (b=0; b<A->nseq; b++)if (A->name[b][0]!='#')fprintf ( stdout, "[%c%c]", toupper (A->seq_al[b][list[a][0]]), toupper(A->seq_al[b][list[a][1]]));\r
- fprintf (stdout,"\n");\r
- }\r
- else if (s=='W')\r
- {\r
- fprintf ( stdout, "@@ WC pair: %4d %4d =>", list[a][0]+1, list [a][1]+1);\r
- for (b=0; b<A->nseq; b++)if (A->name[b][0]!='#')fprintf ( stdout, "[%c%c]", toupper (A->seq_al[b][list[a][0]]), toupper(A->seq_al[b][list[a][1]]));\r
- fprintf (stdout,"\n");\r
- }\r
- else if (s=='N')\r
- {\r
- fprintf ( stdout, "@@ Neutral pair: %4d %4d =>", list[a][0]+1, list [a][1]+1);\r
- for (b=0; b<A->nseq; b++)if (A->name[b][0]!='#')fprintf ( stdout, "[%c%c]", toupper (A->seq_al[b][list[a][0]]), toupper(A->seq_al[b][list[a][1]]));\r
- fprintf (stdout,"\n");\r
- }\r
- else if (s=='I')\r
- {\r
- fprintf ( stdout, "@@ incompatible pair: %4d %4d =>", list[a][0]+1, list [a][1]+1);\r
- for (b=0; b<A->nseq; b++)if (A->name[b][0]!='#')fprintf ( stdout, "[%c%c]", toupper (A->seq_al[b][list[a][0]]), toupper(A->seq_al[b][list[a][1]]));\r
- fprintf (stdout,"\n");\r
- }\r
- a++;\r
- }\r
- \r
- return A;\r
-}\r
-\r
- \r
-Alignment *aln2sample (Alignment *A, int n)\r
-{\r
- Alignment *B;\r
- int a, b, p;\r
- int **pos;\r
- \r
- B=copy_aln (A, NULL);\r
- \r
- vsrand(0);\r
- \r
- pos=declare_int (A->len_aln, 2);\r
- for (a=0; a<A->len_aln; a++){pos[a][0]=a;pos[a][1]=rand()%(1000*A->len_aln);}\r
- \r
- sort_int (pos, 2, 1, 0, A->len_aln-1);\r
- \r
- n=(n==0)?A->len_aln:(MIN (n, (A->len_aln)));\r
- for (a=0; a<n; a++)\r
- for (b=0; b<A->nseq; b++)\r
- A->seq_al[b][a]=B->seq_al[b][pos[a][0]];\r
- for (b=0; b<A->nseq; b++)\r
- A->seq_al[b][n]='\0';\r
- A->len_aln=n;\r
- \r
- free_aln (B);\r
- free_int (pos, -1);\r
- return A;\r
-}\r
-Alignment *aln2bootstrap (Alignment *A, int n)\r
-{\r
- Alignment *B;\r
- int a, b, p;\r
- \r
- if (n==0)n=A->len_aln;\r
- else A=realloc_aln (A, n+1);\r
- vsrand(0);\r
- B=copy_aln (A, NULL);\r
- for (a=0; a<n; a++)\r
- {\r
- p=rand ()%A->len_aln;\r
- for (b=0; b<A->nseq; b++)\r
- A->seq_al[b][a]=B->seq_al[b][p];\r
- }\r
- for ( b=0; b<A->nseq; b++)A->seq_al[b][n]='\0';\r
- A->len_aln=n;\r
- \r
- free_aln (B);\r
- return A;\r
-\r
-}\r
- \r
-\r
-Alignment * aln2random_aln (Alignment *A, char *smode)\r
-\r
-{\r
- int a, b, n, **res;\r
- int max;\r
- \r
- \r
- \r
- if ( smode==NULL)\r
- {\r
- smode=vcalloc (4, sizeof (char));\r
- sprintf ( smode, "SCR");//Sequences, Column Residues\r
- }\r
- else if ( strm (smode, "NO"))return A;\r
- \r
- vsrand(0);\r
- max=A->nseq*1000;\r
- \r
- if ( strstr ( smode, "S"))\r
- {\r
- A=aln2scramble_seq (A);\r
- }\r
- if ( strstr ( smode, "C"))\r
- {\r
- \r
- res=declare_int (A->nseq, 2);\r
- for (a=0; a< A->len_aln; a++)\r
- {\r
- for (n=0,b=0;b<A->nseq; b++)\r
- {\r
- if ( !is_gap(A->seq_al[b][a]))\r
- {\r
- res[n][0]=A->seq_al[b][a];\r
- res[n][1]=rand()%max;\r
- n++;\r
- }\r
- sort_int (res, 2, 1, 0, n-1);\r
- }\r
- for (n=0,b=0;b<A->nseq; b++)\r
- {\r
- if ( !is_gap(A->seq_al[b][a]))A->seq_al[b][a]=res[n++][0];\r
- }\r
- }\r
- free_int (res, -a);\r
- }\r
- \r
- \r
- //Redistributes the residues randomly without changing the gap pattern\r
- if ( strstr ( smode, "R"))\r
- {\r
- max=A->len_aln*A->nseq;\r
- res=declare_int (max, 2);\r
- \r
- for (n=0,a=0; a< A->len_aln; a++)\r
- {\r
- for (b=0;b<A->nseq; b++)\r
- {\r
- if ( !is_gap(A->seq_al[b][a]))\r
- {\r
- res[n][0]=A->seq_al[b][a];\r
- res[n][1]=rand()%max;\r
- n++;\r
- }\r
- \r
- }\r
- }\r
- sort_int (res, 2, 1, 0, n-1);\r
- for (n=0,a=0; a< A->len_aln; a++)\r
- {\r
- for (b=0;b<A->nseq; b++)\r
- {\r
- if ( !is_gap(A->seq_al[b][a]))\r
- {\r
- A->seq_al[b][a]=res[n++][0];\r
- }\r
- \r
- }\r
- }\r
- \r
- free_int (res, -1);\r
- }\r
-\r
- return A;\r
-}\r
-Alignment *score_aln2score_ascii_aln (Alignment *A, Alignment *C)\r
-{\r
- //Convert the output of T-Coffee evaluate into a printable score_ascii alignment*/\r
- //A and C must be sorted \r
- //sets to 0 lone residues\r
- int a, b;\r
- \r
- for (a=0; a<A->nseq; a++)\r
- for (b=0; b<A->len_aln; b++)\r
- {\r
- \r
- int rC=C->seq_al[a][b];\r
- int rA=A->seq_al[a][b];\r
- if ( !strm (A->name[a], C->name[a])){HERE ("Unsorted aln in score_aln2score_ascii"); exit (EXIT_FAILURE);}\r
- \r
- if ( rA=='x' || rA=='X')C->seq_al[a][b]='9';\r
- else if ( rC >='0' && rC<='9');\r
- else if ( rC<10)C->seq_al[a][b]='0'+rC;\r
- else if ( rC==NO_COLOR_RESIDUE && !is_gap(rA)) C->seq_al[a][b]='0';\r
- else if ( rC==NO_COLOR_RESIDUE && is_gap(rA))C->seq_al[a][b]='-';\r
- }\r
- return C;\r
-}\r
-Alignment*aln2gap_cache (Alignment *A, int val)\r
-{\r
- Alignment *B;\r
- int a, b, c, nr;\r
- \r
- B=copy_aln (A, NULL);\r
- for (b=0; b<A->len_aln; b++)\r
- {\r
- for (nr=0,a=0; a<A->nseq; a++)nr+=!is_gap (A->seq_al[a][b]);\r
- for (a=0; a<A->nseq; a++)if (!is_gap(A->seq_al[a][b]))B->seq_al[a][b]=(nr==1)?'0'+val:'1';\r
- }\r
- return B;\r
-}\r
- \r
-Alignment* aln2case_aln (Alignment *B, char *upper, char *lower)\r
-{\r
- int a, b, c, up, lo;\r
- Alignment *A;\r
-\r
- A=copy_aln (B, NULL);\r
- \r
- up=(upper)?upper[0]:'u';\r
- lo=(lower)?lower[0]:'l';\r
- \r
- for (a=0; a<A->nseq; a++)\r
- for (b=0; b<A->len_aln; b++)\r
- {\r
- c=A->seq_al[a][b];\r
- \r
- if ( is_gap(c));\r
- else A->seq_al[a][b]=(isupper (c))?up:lo;\r
- }\r
- return A;\r
-}\r
-Alignment *aln2scale (Alignment *A, char *coffset)\r
-{\r
- int a, b, t, v, n;\r
- char *s1, *s2;\r
- char s[1000];\r
- int offset;\r
- \r
- if (coffset)offset=atoi(coffset);\r
- else offset=0;\r
-\r
- sprintf (s, "%d", A->len_aln+offset);\r
- n=strlen (s);\r
- \r
- A=realloc_aln2 (A, A->nseq+n, A->len_aln+1);\r
- s1=vcalloc ( n+1, sizeof (char));\r
- s2=vcalloc ( n+1, sizeof (char));\r
- \r
- for (a=0; a<n; a++)\r
- {\r
- if (a==0)s2[a]='1';\r
- else strcat (s2, "0");\r
- sprintf (A->name[A->nseq+a], "%s", s2);\r
- }\r
- \r
- for (a=0; a<A->len_aln; a++)\r
- {\r
- sprintf (s1, "%d", a+1+offset);\r
- s2=invert_string (s1);\r
- t=strlen (s2);\r
- \r
- for (b=0; b<=n; b++)\r
- {\r
- if (b>=t) v='0';\r
- else v=s2[b];\r
-\r
- A->seq_al[A->nseq+b][a]=v;\r
- }\r
- }\r
- \r
- A->nseq+=n;\r
- return A;\r
-}\r
-\r
- \r
- \r
- \r
-int * pos2list (int * pos, int len, int *nl)\r
-{\r
- int *list;\r
- int a;\r
- nl[0]=0;\r
- list=vcalloc (len, sizeof (int));\r
- for (a=0; a<len; a++)if (pos[a])list[nl[0]++]=a;\r
- return list;\r
-}\r
-int *list2pos (int *list, int nl, int len)\r
-{\r
- int *pos, a;\r
- pos=vcalloc (len, sizeof (int));\r
- for (a=0; a<nl; a++)pos[list[a]]=1;\r
- return pos;\r
-}\r
-\r
-int **aln2resindex ( Alignment *A, Alignment *B, FILE *fp)\r
-{\r
- int *list, **pos;\r
- int a, b, n, s;\r
- \r
- \r
- list=vcalloc (A->nseq+((B)?B->nseq:0), sizeof (int));\r
- pos=aln2pos_simple_2 (A);\r
- if (B)\r
- {\r
- n=B->nseq;\r
- for ( a=0; a<B->nseq; a++)\r
- {\r
- list[a]=name_is_in_list(B->name[a], A->name, A->nseq, 100);\r
- }\r
- }\r
- else\r
- {\r
- for ( a=0; a<A->nseq; a++)\r
- list[a]=a;\r
- n=A->nseq;\r
- }\r
-\r
- \r
- fprintf ( fp, "#");\r
- for ( b=0; b<n; b++)\r
- {\r
- s=list[b];\r
- if ( s!=-1)fprintf (fp, " %s",A->name[s]);\r
- }\r
- fprintf (fp, "\n");\r
- \r
- for ( a=0; a<A->len_aln; a++)\r
- {\r
- for ( b=0; b<n; b++)\r
- {\r
- s=list[b];\r
- if ( s==-1);\r
- else if (pos[s][a]<0)\r
- fprintf (fp, "%4d", -1);\r
- else\r
- fprintf (fp, "%4d", pos[s][a]);\r
- }\r
- fprintf (fp, "\n");\r
- }\r
- return pos;\r
-}\r
- \r
-int **index_seq_res ( Sequence *S1, Sequence *S2, int **name_index)\r
-{\r
- /*Index the residues of S1 according to S2\r
- index[seq1 of S1][z]->x, where x is the position of residue z of seq1/S1 in S2->seq[index[Seq1/S1]] \r
- */ \r
- int a;\r
- int **index;\r
- char *seq1=NULL, *seq2=NULL;\r
- Alignment *Profile;\r
- \r
- index=vcalloc ( S1->nseq, sizeof (int*));\r
- \r
- for (a=0; a< S1->nseq; a++)\r
- {\r
- int len1, len2, b, c;\r
- \r
- seq1=S1->seq[a];\r
- \r
- if (name_index[a][0]==-1)\r
- seq2=NULL;\r
- else if (name_index[a][1]==-1)\r
- {\r
- seq2=S2->seq[name_index[a][0]];\r
- }\r
- else if ((Profile=seq2R_template_profile (S2, name_index[a][0])) !=NULL)\r
- {\r
- seq2=Profile->seq_al[name_index[a][1]];\r
- }\r
- \r
- len1=strlen (seq1);len2=strlen (seq2);\r
- index[a]=vcalloc (len2, sizeof(int));\r
-\r
- \r
- for (c=0,b=0; b<len2; b++)if( !is_gap(seq2[b]))index[a][c++]=b;\r
- //index[a]=get_res_index ( seq1, seq2);\r
- }\r
- return index;\r
-}\r
-\r
-int **index_seq_name ( Sequence *S1, Sequence *S2)\r
-{\r
- /*Index the names of S1 according to S2\r
- index[seq1 of S1][0]->x if seq1 is the xth sequence of S2\r
- ->-1 if seq1 is nowhere to be found\r
- index[seq1 of S1][1]->z if seq1 is the zth sequence within the xth profile of S2\r
- */\r
- int **index;\r
- int a, b, x, z;\r
- Alignment *Profile;\r
- index=declare_int (S1->nseq, 2);\r
-\r
- \r
- for ( a=0; a<S1->nseq; a++)\r
- {\r
- index[a][0]=index[a][1]=-1;\r
- x=name_is_in_list (S1->name[a],S2->name,S2->nseq,100);\r
- if ( x!=-1){index[a][0]=x;index[a][1]=-1;}\r
- for ( b=0; b<S2->nseq; b++)\r
- {\r
- if ((Profile=seq2R_template_profile (S2,b)))\r
- {\r
- z=name_is_in_list (S1->name[a],Profile->name,Profile->nseq,100);\r
- if ( z!=-1){index[a][0]=b;index[a][1]=z;b=S2->nseq;}\r
- }\r
- }\r
- }\r
- return index;\r
-}\r
- \r
- \r
- \r
- \r
-int *get_name_index (char **l1, int n1, char **l2, int n2)\r
-{\r
- int *r;\r
- int a;\r
- /*return Array[Index_L1]=Index_L2 */\r
- r=vcalloc ( n1, sizeof (int));\r
- for ( a=0; a< n1; a++)\r
- r[a]=name_is_in_list (l1[a],l2,n2,100);\r
- return r;\r
-}\r
-\r
-int* get_res_index (char *seq0, char *seq1)\r
-{\r
- int *coor, a;\r
-\r
- if ( !seq0 || !seq1) return NULL;\r
- \r
-\r
- coor=vcalloc ( strlen (seq0)+1, sizeof (int));\r
- if (!strm (seq0, seq1))\r
- {\r
- int r0, r1 , isr0, isr1;\r
- int l0=0, l1=0;\r
- Alignment *A;\r
- A=align_two_sequences (seq0,seq1,"pam250mt",-5,-1, "myers_miller_pair_wise");\r
- \r
- for ( a=0; a< A->len_aln; a++)\r
- {\r
- r0=A->seq_al[0][a];r1=A->seq_al[1][a];\r
- isr0=!is_gap(r0);\r
- isr1=!is_gap(r1);\r
- l0+= isr0;\r
- l1+= isr1;\r
- if (isr0 && isr1)coor[l0-1]=l1-1;\r
- else if (isr0) coor[l0-1]=-1;\r
- }\r
- free_aln (A);\r
- }\r
- else\r
- {\r
- int l0;\r
- \r
- l0=strlen (seq0);\r
- for ( a=0;a< l0; a++)\r
- coor[a]=a;\r
- }\r
- \r
- return coor;\r
-}\r
-\r
-int change_residue_coordinate ( char *in_seq1, char *in_seq2, int v)\r
-{\r
- /*Expresses the coordinate of a residue in seq1, in the coordinate system of seq2*/\r
-\r
- \r
- static char *seq1, *seq2;\r
- static int *coor;\r
- \r
-\r
- if ( seq1 !=in_seq1 || seq2 !=in_seq2)\r
- {\r
- int r0, r1 , isr0, isr1;\r
- int l0=0, l1=0;\r
- Alignment *A;\r
- int a;\r
- \r
- vfree (coor);\r
- seq1=in_seq1, seq2=in_seq2;\r
- A=align_two_sequences (seq1,seq2,"pam250mt", -14, -2, "myers_miller_pair_wise");\r
- \r
- coor=vcalloc ( A->len_aln, sizeof (int));\r
- for ( a=0; a< A->len_aln; a++)\r
- {\r
- r0=A->seq_al[0][a];r1=A->seq_al[1][a];\r
- \r
- isr0=!is_gap(r0);\r
- isr1=!is_gap(r1);\r
- l0+= isr0;\r
- l1+= isr1;\r
-\r
- if (isr0 && isr1)coor[l0-1]=l1-1;\r
- else if (isr0) coor[l0-1]=-1;\r
- }\r
- free_aln (A);\r
- }\r
- return coor[v];\r
-}\r
- \r
- \r
-int ** minimise_repeat_coor (int **coor, int nseq, Sequence *S)\r
- {\r
- int **new_coor;\r
- int a, min;\r
- new_coor=declare_int ( nseq, 3);\r
- min=return_min_int (coor, nseq, 2);\r
- for ( a=0; a< nseq; a++)\r
- {\r
- new_coor[a][0]=coor[a][0];\r
- new_coor[a][1]=coor[a][1];\r
- new_coor[a][2]=min;\r
- }\r
- return new_coor;\r
- }\r
-int ** get_nol_seq ( Constraint_list *CL, int **coor, int nseq, Sequence *S)\r
- {\r
- int a, s, p, l, nl;\r
- int **buf;\r
- int **new_coor;\r
- \r
- new_coor=declare_int ( nseq+1, 3);\r
- \r
- \r
- buf=get_undefined_list ( CL);\r
- \r
- \r
-\r
- for ( a=0; a< nseq; a++)buf[coor[a][0]][coor[a][1]]=1;\r
-\r
- \r
- for ( a=0; a< nseq; a++)\r
- {\r
- s=coor[a][0];\r
- p=coor[a][1]+1;\r
- l=strlen(S->seq[s]);\r
- nl=0;\r
- while ( p<=l && !buf[s][p++])nl++;\r
- new_coor[a][0]=s;\r
- new_coor[a][1]=coor[a][1];\r
- new_coor[a][2]=nl;\r
- }\r
- free_int ( buf, -1);\r
- return new_coor;\r
- }\r
-\r
-\r
-\r
-int compare_pos_column( int **pos1,int p1, int **pos2,int p2, int nseq)\r
- {\r
- int a,v1, v2;\r
- int identical=0;\r
-\r
-\r
- \r
- for ( a=0; a< nseq; a++)\r
- {\r
- \r
- v1=pos1[a][p1];\r
- v2=pos2[a][p2];\r
- \r
- if (v1>0 || v2>0) \r
- {\r
- if ( v1!=v2)return 0;\r
- else identical=1;\r
- }\r
- }\r
- \r
- return identical;\r
- }\r
-\r
-char *seq2alphabet (Sequence *S)\r
-{\r
- return array2alphabet (S->seq, S->nseq, "");\r
-}\r
-\r
-char *aln2alphabet (Alignment *A)\r
-{\r
- return array2alphabet (A->seq_al, A->nseq, "");\r
-} \r
-\r
-char *array2alphabet (char **array, int n, char *forbiden)\r
-{\r
- int a, b, l;\r
- int *hasch;\r
- char *alphabet;\r
-\r
- hasch=vcalloc (256, sizeof (int));\r
- alphabet=vcalloc ( 257, sizeof (char));\r
- \r
- \r
- for ( a=0; a<n; a++)\r
- {\r
- l=strlen (array[a]);\r
- for ( b=0; b<l; b++)\r
- hasch[tolower(array[a][b])]++;\r
- }\r
- \r
- for ( a=0, b=0; a< 256; a++)\r
- {\r
- if (hasch[a] && !strrchr(forbiden,a))alphabet[b++]=a;\r
- }\r
- \r
- alphabet[b]='\0';\r
- vfree (hasch);\r
- return alphabet;\r
-}\r
-\r
-\r
-//***************************************************************\r
-//\r
- // TM PRED\r
-//***************************************************************\r
-\r
-char* alnpos2hmmtop_pred (Alignment *A,Alignment *Pred, int pos, int mode)\r
-{\r
- static char *result;\r
- static Alignment *Cache;\r
- static int *score;\r
- int a, tot, cons;\r
- \r
- if (!score)\r
- {\r
- score=vcalloc (256, sizeof (int));\r
- result=vcalloc (100, sizeof (char));\r
- }\r
-\r
- if (!Pred && !Cache)\r
- {\r
- Cache=aln2hmmtop_pred (A);\r
- }\r
- if (!Pred) Pred=Cache;\r
- \r
-\r
- for (tot=0,a=0; a<A->nseq; a++)\r
- {\r
- char s;\r
- s=Pred->seq_al[a][pos];\r
- if (!is_gap(s))\r
- {\r
- score[tolower(s)]++;\r
- tot++;\r
- }\r
- }\r
-\r
- if ( score['h']>score['i'] && score['h']>score['o'])cons='h';\r
- \r
- else if ( score['i']>score['o'])cons='i';\r
- else cons='o';\r
- if (tot==0) return "";\r
-\r
- \r
- if (mode==VERBOSE)sprintf (result, " H: %3d I: %3d O: %3d P: %c", (score['h']*100)/tot, (score['i']*100)/tot, (score['o']*100)/tot, cons);\r
- else if (mode == SHORT)sprintf ( result, "%c", cons);\r
- score['h']=score['o']=score['i']=0;\r
- return result;\r
-}\r
- \r
- \r
-Alignment * aln2hmmtop_pred (Alignment *A)\r
- {\r
- int a, b, c;\r
- char *buf, *pred;\r
- Alignment *PA;\r
-\r
- PA=copy_aln (A, NULL);\r
- buf=vcalloc ( A->len_aln+1, sizeof (char));\r
- \r
- for ( a=0; a< A->nseq; a++)\r
- {\r
- sprintf (buf, "%s", A->seq_al[a]);\r
- pred=seq2tmstruc (buf);\r
- for (c=0,b=0; b<A->len_aln; b++)\r
- {\r
- if (!is_gap (PA->seq_al[a][b]))PA->seq_al[a][b]=pred[c++];\r
- }\r
- vfree (pred);\r
- }\r
- vfree (buf);\r
- return PA;\r
- }\r
- \r
-char * seq2tmstruc ( char *seq)\r
- {\r
- static Sequence *S;\r
- char *seqfile, *predfile, *buf;\r
- FILE *fp;\r
- \r
- seqfile=vtmpnam (NULL);\r
- predfile=vtmpnam (NULL);\r
- \r
- fp=vfopen (seqfile, "w");\r
- fprintf ( fp, ">seq1\n%s", seq);\r
- vfclose (fp);\r
- \r
- \r
- printf_system ( "fasta_seq2hmmtop_fasta.pl -in=%s -out=%s -arch=%s/%s -psv=%s/%s", seqfile, predfile, get_mcoffee_4_tcoffee(), "hmmtop.arch", get_mcoffee_4_tcoffee(), "hmmtop.psv");\r
- S=get_fasta_sequence (predfile, NULL);\r
- buf=vcalloc ( strlen (S->seq[0])+1, sizeof (char));\r
- sprintf ( buf, "%s", S->seq[0]);\r
- \r
- free_sequence (S, S->nseq);\r
- \r
- return buf;\r
- }\r
- \r
-char * set_blast_default_values()\r
-{\r
- set_string_variable ("blast_server", (getenv ("blast_server_4_TCOFFEE"))?getenv ("blast_server_4_TCOFFEE"):"EBI");\r
- set_string_variable ("pdb_db", (getenv ("pdb_db_4_TCOFFEE"))?getenv ("pdb_db_4_TCOFFEE"):"pdb");\r
- set_string_variable ("prot_db", (getenv ("prot_db_4_TCOFFEE"))?getenv ("prot_db_4_TCOFFEE"):"uniprot");\r
- set_int_variable ("prot_min_sim", 0);\r
- set_int_variable ("prot_max_sim", 100);\r
- \r
- set_int_variable ("prot_min_cov", 0);\r
- set_int_variable ("prot_max_cov", 100);\r
-\r
- set_int_variable ("pdb_min_sim", 0);\r
- set_int_variable ("pdb_max_sim", 100);\r
- set_int_variable ("pdb_min_cov", 0);\r
- set_int_variable ("pdb_max_cov", 100);\r
- \r
- return;\r
-}\r
- \r
-char * seq2pdb (Sequence *S)\r
-{\r
- set_blast_default_values();\r
- S->nseq=1;\r
- S=seq2template_seq (S, "PDB", NULL);\r
- return seq2P_pdb_id(S,0);\r
-}\r
-\r
-Alignment * seq2blast ( Sequence *S)\r
-{\r
- Alignment *A;\r
- set_blast_default_values();\r
- \r
- if (S->nseq==1)\r
- {\r
- S=seq2template_seq (S, "BLAST", NULL);\r
- A=seq2R_template_profile(S,0);\r
- sprintf ( A->name[0], "%s", S->name[0]);\r
- }\r
- else\r
- {\r
- int a;\r
- for (a=0; a< S->nseq; a++)\r
- {\r
- Sequence *NS;\r
- char name[1000];\r
- NS=fill_sequence_struc(1, &(S->seq[a]), &(S->name[a]));\r
- NS=seq2template_seq (NS, "BLAST", NULL);\r
- A=seq2R_template_profile(NS,0);\r
- sprintf ( name, "%s.prf", S->name[a]);\r
- \r
- output_fasta_aln (name,A); \r
- fprintf (stdout, "\nOUTPUT %s\n", name);\r
- }\r
- exit (EXIT_SUCCESS);\r
- }\r
- return A;\r
-}\r
- \r
- \r
- \r
-\r
-Sequence * seq2unique_name_seq ( Sequence *S)\r
-{\r
- int a;\r
- if ((a=name_list2unique_name_list (S->nseq, S->name)))\r
- {\r
- add_warning ( stderr, "\nWarning: Sequence %s is duplicated in file %s. The sequence will be renamed", S->name[a-1], S->file[a-1]);\r
- }\r
- return S;\r
-}\r
-Alignment * aln2unique_name_aln ( Alignment *S)\r
-{\r
- int a;\r
- if ((a=name_list2unique_name_list (S->nseq, S->name)))\r
- {\r
- add_warning ( stderr, "\nWarning: Sequence %s is duplicated in file %s. The sequence will be renamed", S->name[a-1], S->file[a-1]);\r
- }\r
- return S;\r
-}\r
-\r
- \r
-int name_list2unique_name_list (int n, char **name)\r
-{\r
- int duplicate=0; \r
- int a, b;\r
- \r
- for (a=0; a<n-1; a++)\r
- for (b=a+1; b<n; b++)\r
- {\r
- if ( strm (name[a], name[b]))\r
- {duplicate=a+1;b=a=n;}\r
- }\r
- \r
- if (duplicate)\r
- {\r
- char *tmp1, *tmp2;\r
- Sequence *S;\r
- FILE *fp;\r
- \r
- tmp1=vtmpnam (NULL);\r
- tmp2=vtmpnam (NULL);\r
- fp=vfopen (tmp1, "w");\r
- for (a=0; a< n; a++)fprintf ( fp, ">%s\naggggg\n", name[a]);\r
- vfclose (fp);\r
- printf_system ("fasta_aln2fasta_aln_unique_name.pl %s > %s", tmp1, tmp2);\r
- S=get_fasta_sequence (tmp2, NULL);\r
- for (a=0; a<n; a++)\r
- {\r
- name[a]=vrealloc (name [a], sizeof (int)*(strlen (S->name[a])+1));\r
- sprintf ( name[a], "%s", S->name [a]);\r
- }\r
- free_sequence(S, -1);\r
- }\r
- return duplicate;\r
-}\r
-\r
-Sequence* seq2clean_seq (Sequence *S, char *alp)\r
-{\r
- int a, b, c, d, l;\r
- \r
- for (a=0; a< S->nseq; a++)\r
- {\r
- l=strlen (S->seq[a]);\r
- for (d=0,b=0; b<l; b++)\r
- {\r
- c=S->seq[a][b];\r
- if ( alp==NULL && !strchr (AA_ALPHABET, c) && !strchr (DNA_ALPHABET, c));\r
- else if (alp && strchr (alp, c));\r
- else S->seq[a][d++]=c;\r
- }\r
- S->seq[a][d]='\0';\r
- S->len[a]=strlen (S->seq[a]);\r
- }\r
- return S;\r
-} \r
-int ** seq2aln_pos (Alignment *A, int *ns, int **l_s)\r
- {\r
- int **code;\r
- int a, b,c, d,l, p , g;\r
- \r
-\r
- l=MAX(strlen (A->seq_al[l_s[0][0]]), strlen (A->seq_al[l_s[1][0]]));\r
- code=declare_int ((A->S)->nseq,l+1);\r
- \r
- for (c=0; c<2; c++)\r
- {\r
- l=strlen (A->seq_al[l_s[c][0]]);\r
- for (d=0; d<ns[c]; d++)\r
- {\r
- a=A->order[l_s[c][d]][0];\r
- for (p=0, b=0; b<l; b++)\r
- {\r
- g=is_gap (A->seq_al[l_s[c][d]][b]);\r
- if (!g){p++; code[a][p]=b+1;}\r
- }\r
- }\r
- }\r
- return code;\r
- }\r
-\r
-Alignment *local_maln2global_maln (char *seq, Alignment *A)\r
- {\r
- /*inputs a BLAST alignmnent where the master sequence may be partila\r
- outputs the same alignment, while amkeing sure the profile is perfectly in sink with its master sequence\r
- */\r
-\r
- int a, b, c;\r
- int start, end, rend;\r
- char qname[100], *p;\r
- Alignment *B=NULL;\r
- \r
- sprintf ( qname, "%s", A->name[0]);\r
- p=strtok (qname, "_");\r
- if ( !strm (p, "QUERY"))\r
- {\r
- fprintf ( stderr, "\nUnappropriate format for the alignment [%s:FATAL]", PROGRAM);\r
- myexit (EXIT_FAILURE);\r
- }\r
- \r
- start=atoi(strtok (NULL, "_"));\r
- end=atoi(strtok (NULL, "_"));\r
- rend=strlen (seq);\r
- \r
- B=copy_aln (A,NULL);\r
- if ( start>1 || end<rend )A=realloc_aln (A,rend+1);\r
-\r
- for (a=0; a<start-1; a++)\r
- {\r
- A->seq_al[0][a]=seq[a];\r
- for ( b=1; b< A->nseq; b++)A->seq_al[b][a]='-';\r
- }\r
- \r
- for (c=0,a=start-1; a< end; a++, c++)\r
- {\r
- A->seq_al[0][a]=seq[a];\r
- for ( b=1; b< A->nseq; b++)\r
- {\r
- A->seq_al[b][a]=B->seq_al[b][c];\r
- }\r
- }\r
- for ( a=end; a<rend; a++)\r
- {\r
- A->seq_al[0][a]=seq[a];\r
- for ( b=1; b< A->nseq; b++)A->seq_al[b][a]='-';\r
- }\r
- for ( a=0; a< A->nseq; a++) A->seq_al[a][rend]='\0';\r
- free_aln (B);\r
- \r
- A->len_aln=rend;\r
- return A;\r
- }\r
-\r
-int ** aln2inv_pos ( Alignment *A)\r
-{\r
- int **pos,a;\r
- pos=vcalloc (A->nseq, sizeof (char*));\r
- for (a=0; a< A->nseq; a++)pos[a]=seq2inv_pos (A->seq_al[a]);\r
- return pos;\r
-}\r
-int * seq2inv_pos ( char *seq)\r
-{\r
- /*returns a list where each value gives the index of the corresponding residue in seq*/\r
- /*Numbering: 1 to L : Analogy to the aln2pos*/\r
- \r
- int a,l1, l2;\r
- int *pos;\r
- \r
- l1=strlen ( seq);\r
- for ( l2=a=0; a< l1; a++)l2+=1-is_gap(seq[a]);\r
- pos=vcalloc (l2+1, sizeof (int));\r
- for ( l2=a=0; a< l1; a++)if (!is_gap(seq[a]))pos[++l2]=a+1;\r
- return pos;\r
-}\r
- \r
- \r
-int ** aln2pos_simple_2 (Alignment *A)\r
- {\r
- int **pos1;\r
- int **pos2;\r
- pos1=aln2pos_simple (A, A->nseq);\r
- pos2=duplicate_int (pos1, A->nseq,read_size_int (pos1[0],sizeof (int)));\r
- pos1=aln2pos_simple (NULL, 0);\r
- return pos2;\r
- }\r
-int ** aln2pos_simple (Alignment *A, int n_nseq, ...)\r
- {\r
- /*\r
- function documentation: start\r
- int ** aln2pos_simple (Alignment *A, int n_nseq, ...)\r
-\r
-####with two parameter only: Alignment *A, int n_nseq\r
-\r
- this function turns A into pos, a matrix where each residue is replace by its index according to the complete sequence.\r
- the indices in pos are computed using A->order[x][1] that contains the indice of the first residue of seq x of A\r
- \r
- n_nseq MUST not be null\r
-\r
-####with more than two param:\r
- int ** aln2pos_simple (Alignment *A, int n_nseq, int *ns, int **ls)\r
- n_nseq must be set to 0 for the param 3 and four to be read\r
- \r
- ns[x]=number seq in group \r
- ls[x]=list of the sequences in group x ( size=ns[x])\r
- \r
- The computation of the indices is only carried out on the scpecified residues\r
-\r
-####IMPORTANT\r
- in pos, the numbering of the residues goes from 1 to L:\r
- pos[0][0]=3, means that the first position of the first sequence \r
- in the alignmnet contains residue #3 from sequence A->order[0][0];\r
- \r
- function documentation: end\r
- */\r
-\r
- int a, b,c, p, g,l;\r
- int **T;\r
-\r
- int max_nseq;\r
- int n_len=0;\r
-\r
- int *list=NULL;\r
- int *ns=NULL;\r
- int **ls=NULL;\r
-\r
-\r
-\r
- va_list ap;\r
- \r
- \r
- if ( A==NULL)\r
- {\r
- return NULL;\r
- }\r
- else\r
- {\r
- if ( n_nseq>0)\r
- {\r
- list=vcalloc(n_nseq, sizeof (int));\r
- for ( a=0; a< n_nseq; a++)list[a]=a;\r
- }\r
- else\r
- { \r
- va_start (ap, n_nseq);\r
- ns=va_arg(ap, int * );\r
- ls=va_arg(ap, int **);\r
- va_end(ap);\r
- list=vcalloc ( ns[0]+ns[1], sizeof (int));\r
- n_nseq=0;\r
- for ( a=0; a< ns[0]; a++)list[n_nseq++]=ls[0][a];\r
- for ( a=0; a< ns[1]; a++)list[n_nseq++]=ls[1][a];\r
- \r
- }\r
- max_nseq=MAX(read_size_int(A->order,sizeof (int*)),return_max_int (A->order, read_size_int(A->order,sizeof (int*)),0))+1;\r
- n_len=get_longest_string ( A->seq_al,A->max_n_seq, NULL, NULL)+1; \r
- \r
-\r
- T=declare_int (max_nseq, n_len);\r
- for ( c=0; c< n_nseq; c++)\r
- {\r
- a=list[c]; \r
- l=strlen ( A->seq_al[a]);\r
- \r
- for ( p=A->order[a][1],b=0; b<l; b++)\r
- {\r
- g=1-is_gap(A->seq_al[a][b]);\r
- p+=g;\r
- T[a][b]=(g==1)?p:-(1+p);\r
- if ( A->seq_al[a][b]==UNDEFINED_RESIDUE)T[a][b]=0;\r
- if ( A->seq_cache && T[a][b]>0)T[a][b]=A->seq_cache[A->order[a][0]][T[a][b]];\r
- } \r
- }\r
- vfree (list);\r
- }\r
- \r
- return T;\r
- }\r
-Alignment ** split_seq_in_aln_list ( Alignment **aln, Sequence *S, int n_seq, char **seq_list)\r
- {\r
- int a, b, c;\r
- char * long_seq=NULL;\r
- int len,l;\r
- int **translation;\r
- int **table;\r
-\r
-\r
- \r
-\r
- if ( aln==NULL)return NULL;\r
- translation=declare_int ( S->nseq,2);\r
- \r
- for (len=0,a=0; a< S->nseq; a++)\r
- {\r
- if((b=name_is_in_list (S->name[a],seq_list, n_seq, 100))!=-1)\r
- {\r
- l=strlen(S->seq[a])+1;\r
- long_seq=vrealloc(long_seq,(len+l+1)*sizeof(char));\r
- long_seq=strcat(long_seq, S->seq[a]);\r
- long_seq=strcat(long_seq, "*"); \r
- \r
- translation[a][0]=b;\r
- translation[a][1]=len;\r
- len+=l;\r
- }\r
- else translation[a][0]=-1;\r
- }\r
-\r
- long_seq[len-1]='\0';\r
- len--;\r
-\r
- table=declare_int ( len+1, 2);\r
-\r
- for ( b=0,a=0; a< S->nseq; a++)\r
- {\r
- if ( translation[a][0]!=-1)\r
- {\r
- c=1;\r
- while (long_seq[b]!='\0' && long_seq[b]!='*')\r
- {\r
- table[b+1][1]=c++;\r
- table[b+1][0]=translation[a][0];\r
- b++;\r
- }\r
- table[b][1]=c++;\r
- table[b][0]=translation[a][0];\r
- b++;\r
- }\r
- }\r
-\r
- for ( a=0; a< (aln[-1])->nseq; a++)\r
- {\r
- for ( b=0; b< (aln[a])->nseq; b++)\r
- {\r
- \r
- (aln[a])->order[b][0]=table[(aln[a])->order[b][1]][0];\r
- (aln[a])->order[b][1]=table[(aln[a])->order[b][1]][1];\r
- sprintf ( (aln[a])->name[b],"%s_%d_%d", S->name[(aln[a])->order[b][0]],a+1,b+1); \r
- }\r
- }\r
- free_int (translation, -1);\r
- free_int (table, -1);\r
- return aln;\r
- }\r
-\r
-\r
-\r
-Sequence * fill_sequence_struc ( int nseq, char **sequences, char **seq_name)\r
- {\r
- int a;\r
- Sequence *S;\r
- int shortest, longuest;\r
-\r
- if (!sequences)\r
- {\r
- shortest=longuest=0;\r
- }\r
- else if ( nseq>1)\r
- {\r
- shortest=get_shortest_string( sequences, nseq, NULL, NULL);\r
- longuest=get_longest_string (sequences, nseq, NULL, NULL);\r
- }\r
- else if ( nseq==1)\r
- {\r
- shortest=longuest=strlen (sequences[0]);\r
- }\r
- else\r
- {\r
- return NULL;\r
- }\r
- \r
-\r
- S=declare_sequence (shortest, longuest,nseq);\r
- S->nseq=nseq;\r
- \r
- if (sequences)S->seq=copy_char ( sequences, S->seq, nseq, -1);\r
- else S->seq=declare_char (S->nseq, 1);\r
- \r
- S->name=copy_char ( seq_name, S->name,nseq, -1);\r
- \r
- ungap_array (S->seq,nseq);\r
- for ( a=0; a< S->nseq; a++)S->len[a]=strlen(S->seq[a]);\r
- return S;\r
- }\r
-\r
-\r
-Alignment * thread_profile_files2aln (Alignment *A, char *template_file, Fname *F)\r
-{\r
- \r
- Alignment *P;\r
- int a;\r
- \r
- if (!A->S)A->S=aln2seq (A);\r
- if (template_file)A->S=seq2template_seq (A->S, template_file,F);\r
- for ( a=0; a< A->nseq; a++)\r
- {\r
- P=seq2R_template_profile (A->S, a);\r
- if ( P)\r
- {\r
- P->expand=1;\r
- sprintf ( P->name[0], "%s", A->name[a]);\r
- }\r
- }\r
- \r
- return expand_aln (A);\r
-}\r
- \r
- \r
-\r
-\r
-Alignment * expand_aln (Alignment *A)\r
- {\r
- /*This function expands the profiles within an alignment*/\r
- \r
- \r
- int a, b, d, e;\r
- Alignment *MAIN=NULL, *SUB=NULL;\r
- int n_sub_seq=0;\r
- int new_nseq=0;\r
- int *list;\r
- Alignment *Profile;\r
- \r
- if ( !A)return A;\r
-\r
-\r
- \r
- list=vcalloc (A->nseq, sizeof (int)); \r
- for ( a=0; a< A->nseq; a++)\r
- {\r
- Profile=seq2R_template_profile (A->S, A->order[a][0]);\r
- if (Profile && Profile->expand)\r
- {\r
- new_nseq+=Profile->nseq;\r
- }\r
- else \r
- {\r
- new_nseq++;\r
- list[n_sub_seq++]=a;\r
- } \r
- }\r
- \r
- if ( n_sub_seq==A->nseq){vfree(list);return A;}\r
- else if (n_sub_seq==0){MAIN=copy_aln (A, MAIN);MAIN->nseq=0;}\r
- else\r
- {\r
- MAIN=extract_sub_aln (A, n_sub_seq, list);\r
- }\r
- vfree(list);\r
- \r
- \r
- for ( a=0; a< A->nseq; a++)\r
- {\r
- Profile=seq2R_template_profile (A->S, A->order[a][0]);\r
- if ( Profile && Profile->expand)\r
- {\r
- SUB=copy_aln (Profile,SUB);\r
- SUB=realloc_aln2(SUB, SUB->nseq, A->len_aln+1);\r
- \r
- for ( e=0,b=0; b< A->len_aln; b++)\r
- {\r
- if ( is_gap(A->seq_al[a][b]))\r
- {for (d=0; d< SUB->nseq; d++)SUB->seq_al[d][b]='-';}\r
- else \r
- {\r
- for(d=0; d<SUB->nseq; d++)SUB->seq_al[d][b]=Profile->seq_al[d][e];\r
- e++;\r
- }\r
- \r
- }\r
- MAIN=stack_aln(MAIN, SUB);\r
- }\r
- }\r
- free_aln (A);\r
- free_aln (SUB);\r
- return MAIN;\r
- }\r
-Alignment * expand_number_aln (Alignment *A,Alignment *EA)\r
- {\r
- /*This function expands the profiles within an alignment*/\r
- \r
- \r
- int a, b, d, e;\r
- Alignment *MAIN=NULL, *SUB=NULL, *C=NULL;\r
- int n_sub_seq=0;\r
- int new_nseq=0;\r
- int *list;\r
- Alignment *Profile;\r
-\r
- if ( !EA || !A)return EA;\r
-\r
- if ( EA->nseq<A->nseq)\r
- {\r
- fprintf (stderr, "\n[ERROR:expand_number_aln] Using as a master an expanded aln (%d %d) [FATAL:%s]", EA->nseq, A->nseq,PROGRAM);\r
- EA->A=A->A=NULL;\r
- print_aln (EA);\r
- print_aln (A);\r
- myexit (EXIT_FAILURE);\r
- }\r
- \r
-\r
- list=vcalloc (EA->nseq, sizeof (int)); \r
- for ( a=0; a< EA->nseq; a++)\r
- {\r
- Profile=seq2R_template_profile (EA->S, EA->order[a][0]);\r
- if (Profile && Profile->expand)new_nseq+=Profile->nseq;\r
- else \r
- {\r
- new_nseq++;\r
- list[n_sub_seq++]=a;\r
- } \r
- }\r
- \r
- if ( n_sub_seq==EA->nseq){vfree(list);return EA;}\r
- else if (n_sub_seq==0){MAIN=copy_aln (EA, MAIN);MAIN->nseq=0;}\r
- else\r
- {\r
- MAIN=extract_sub_aln (EA, n_sub_seq, list); \r
- }\r
- \r
- \r
- list[0]=EA->nseq;\r
- C=extract_sub_aln (EA,1, list); \r
- vfree(list);\r
- \r
- \r
- \r
- for ( a=0; a< EA->nseq; a++)\r
- {\r
- Profile=seq2R_template_profile (EA->S, EA->order[a][0]);\r
- if ( Profile && Profile->expand)\r
- {\r
- SUB=copy_aln (Profile,SUB);\r
- SUB=realloc_aln2(SUB, SUB->nseq, EA->len_aln+1);\r
- \r
- for ( e=0,b=0; b<= EA->len_aln; b++)\r
- {\r
- if (is_gap(A->seq_al[a][b]))\r
- {\r
- for ( d=0; d<SUB->nseq; d++)\r
- SUB->seq_al[d][b]=NO_COLOR_RESIDUE;\r
- }\r
- else\r
- {\r
- for ( d=0; d<SUB->nseq; d++)\r
- {\r
-\r
- if ( is_gap (Profile->seq_al[d][e]))\r
- {\r
- SUB->seq_al[d][b]=NO_COLOR_RESIDUE;\r
- }\r
- else SUB->seq_al[d][b]=EA->seq_al[a][b];\r
- }\r
- e++;\r
- }\r
- }\r
- for (d=0; d< SUB->nseq; d++)SUB->score_seq[d]=EA->score_seq[a];\r
- \r
- MAIN=stack_aln(MAIN, SUB);\r
- }\r
- }\r
- \r
- MAIN=stack_aln(MAIN, C);\r
- MAIN->nseq--;\r
- MAIN->score=MAIN->score_aln=EA->score_aln;\r
- \r
- free_aln (SUB);\r
- free_aln (EA);\r
- \r
- free_aln (C);\r
- \r
- return MAIN;\r
- }\r
-\r
-Alignment * probabilistic_rm_aa ( Alignment *A, int pos, int len)\r
-{\r
- int random_len=0;\r
- int a, b;\r
- int left, right;\r
-\r
- if ( len<0)\r
- {\r
- random_len=1;\r
- len=-len;\r
- }\r
- \r
- vsrand(0);\r
-\r
- if (pos==0)pos= (rand()%(A->len_aln-(2*len+len))) +len;\r
- \r
- \r
- for ( a=0; a< A->nseq; a++)\r
- {\r
- if (random_len)left =rand()%len;\r
- else left=len;\r
- if (random_len)right=rand()%len;\r
- else right=len;\r
- if ( (pos-right)<0 || (pos+left)>A->len_aln)\r
- {\r
- add_warning ( stderr, "\nWarning: probabilistic_rm_aa, pos out of range [%s]\n", PROGRAM);\r
- }\r
- else\r
- for ( b=pos-right; b<pos+left; b++)A->seq_al[a][b]=(b==pos)?'~':'*';\r
- }\r
-\r
- ungap_aln (A);\r
- free_sequence ( A->S, A->nseq);\r
- A->S=aln2seq (A);\r
- return A;\r
- \r
-}\r
- \r
-Alignment * remove_gap_column ( Alignment *A, char *mode)\r
- {\r
- int a, b;\r
- char *p;\r
- int *seq_list;\r
- int nseq=0;\r
- int keep_col, cl;\r
-\r
-\r
- seq_list =vcalloc ( A->nseq, sizeof (int));\r
- while ( (p=strtok(mode, ":")))\r
- {\r
- mode=NULL;\r
- if (p[0]=='#')\r
- {\r
- seq_list[nseq++]=atoi(p+1)-1;\r
- }\r
- else if ( (a=name_is_in_list (p, A->name, A->nseq, 100))!=-1)\r
- {\r
- seq_list[nseq++]=a;\r
- }\r
- }\r
- \r
- if ( nseq==0)\r
- {\r
- for ( a=0; a< A->nseq; a++)seq_list[a]=a;\r
- nseq=A->nseq;\r
- }\r
-\r
- for ( cl=0,a=0; a<=A->len_aln; a++)\r
- {\r
- for (keep_col=1, b=0; b< nseq && keep_col; b++)\r
- {\r
- keep_col=(is_gap(A->seq_al[seq_list[b]][a]))?0:keep_col;\r
- }\r
- \r
- if ( keep_col)\r
- {\r
- for ( b=0; b< A->nseq; b++)\r
- {\r
- A->seq_al[b][cl]=A->seq_al[b][a];\r
- }\r
- cl++;\r
- }\r
- else\r
- {\r
- for ( b=0; b< A->nseq; b++)\r
- {\r
- A->seq_al[b][cl]='-';\r
- }\r
- cl++;\r
- }\r
- }\r
- A->len_aln=cl;\r
- vfree (seq_list);\r
- \r
- return A;\r
- }\r
-\r
-\r
-Alignment * ungap_sub_aln (Alignment *A, int ns, int *ls)\r
- {\r
-\r
- int a, b, c,t;\r
- int len;\r
-\r
- len=strlen ( A->seq_al[ls[0]]);\r
-\r
- for ( c=0,a=0; a<len; a++)\r
- {\r
- for ( t=0,b=0; b<ns; b++)\r
- t+=is_gap(A->seq_al[ls[b]][a]);\r
- if (t==ns);\r
- else\r
- {\r
- for ( b=0; b<ns; b++)\r
- A->seq_al[ls[b]][c]=A->seq_al[ls[b]][a];\r
- c++;\r
- }\r
- }\r
- for ( b=0; b<ns; b++)A->seq_al[ls[b]][c]='\0'; \r
- return A;\r
- }\r
-\r
-Sequence * ungap_seq ( Sequence *S)\r
- {\r
- int a;\r
- \r
- if ( !S)return NULL;\r
- ungap(S->seq[0]);\r
- S->max_len=S->min_len=strlen (S->seq[0]);\r
- for ( a=0; a< S->nseq; a++)\r
- {\r
- ungap(S->seq[a]);\r
- S->len[a]=strlen (S->seq[a]);\r
- S->max_len=MAX(S->max_len,S->len[a]);\r
- S->min_len=MAX(S->min_len,S->len[a]);\r
- }\r
- return S;\r
- \r
- }\r
-Alignment * unalign_aln (Alignment *A, Alignment *C, int t)\r
-{\r
- int a, b, pos, len;\r
- Sequence *S;\r
- \r
- for (a=0; a<A->nseq; a++)\r
- for (b=0; b<A->len_aln; b++)\r
- {\r
- int res=C->seq_al[a][b];\r
- A->seq_al[a][b]=toupper(A->seq_al[a][b]);\r
- if ((isdigit (res) && (res-'0')<=t))\r
- A->seq_al[a][b]=tolower(A->seq_al[a][b]);\r
- }\r
- \r
- \r
- for (pos=-1, a=0; a<C->nseq; a++)\r
- {\r
- b=0;\r
- while ( C->seq_al[a][b])\r
- {\r
- int res=C->seq_al[a][b];\r
- if ((isdigit (res) && (res-'0')<=t))\r
- {\r
- if (pos==-1){pos=b;len=1;}\r
- else len++;\r
- }\r
- else if (pos!=-1)\r
- {\r
- \r
- C=unalign_aln_pos(C,a,pos, len);\r
- pos=-1;\r
- }\r
- b++;\r
- }\r
- if ( pos!=-1){C=unalign_aln_pos(C,a,pos, len);pos=-1;}\r
- }\r
- S=aln2seq (A);\r
- thread_seq_struc2aln (C, S);\r
- A=realloc_aln2 (A, A->nseq, C->len_aln+1);\r
- A->len_aln=C->len_aln;\r
- for (a=0; a<A->nseq; a++)sprintf ( A->seq_al[a], "%s", C->seq_al[a]);\r
- ungap_aln (A);\r
- \r
- free_sequence (S, -1);\r
- return A;\r
-}\r
-Alignment * unalign_aln_pos (Alignment *A, int s, int p, int l)\r
-{\r
- int a;\r
- char *buf;\r
- int unalign=0;\r
- \r
- \r
- buf=vcalloc (l+1, sizeof (char));\r
- for (a=0; a<l; a++)\r
- {\r
- buf[a]=A->seq_al[s][p+a];\r
- A->seq_al[s][p+a]='-';\r
- }\r
- \r
- \r
- A=insert_gap_col (A,p, l);\r
- for (a=0; a<l; a++)\r
- {\r
- A->seq_al[s][p+a]=buf[a];\r
- }\r
- vfree (buf);\r
- return A;\r
-} \r
-Alignment * insert_gap_col (Alignment *A, int p, int l)\r
-{\r
- int a, c;\r
- char *buf;\r
- char *gap;\r
-\r
- gap=generate_null(l);\r
- if ( !A || p>=A->len_aln || p<0 || p<=0)return A;\r
- \r
- buf=vcalloc (A->len_aln+l+1, sizeof (char));\r
- A=realloc_aln2(A,A->nseq, A->len_aln+l+1);\r
- for (a=0; a<A->nseq; a++)\r
- {\r
- c=A->seq_al[a][p];\r
- A->seq_al[a][p]='\0';\r
- sprintf ( buf, "%s%s%c%s", A->seq_al[a],gap,c,A->seq_al[a]+p+1);\r
- sprintf (A->seq_al[a], "%s", buf);\r
- }\r
- vfree (buf);\r
- A->len_aln+=l;\r
- return A;\r
-}\r
-Alignment * unalign_residues (Alignment *A, int si1, int si2)\r
-{\r
- char *s1, *s2, *ns1, *ns2; \r
- int l, a, b,r1, r2;\r
- \r
- s1=A->seq_al[si1];s2=A->seq_al[si2];\r
- l=strlen (s1);\r
- \r
- ns1=vcalloc (2*l+1, sizeof (char));\r
- ns2=vcalloc (2*l+1, sizeof (char));\r
- \r
- for (b=a=0; a< l; a++)\r
- {\r
- r1=s1[a]; r2=s2[a];\r
- if (is_gap(r1) || is_gap(r2) || isupper (r1) || isupper(r2))\r
- {\r
- ns1[b]=(r1=='.')?'-':r1;\r
- ns2[b]=(r2=='.')?'-':r2;\r
- b++;\r
- }\r
- else\r
- {\r
- ns1[b]=r1;\r
- ns2[b]='-';\r
- b++;\r
- ns2[b]=r2;\r
- ns1[b]='-';\r
- b++;\r
- }\r
- }\r
- ns1[b]='\0';\r
- ns2[b]='\0';\r
- A->seq_al[si1]=ns1;\r
- A->seq_al[si2]=ns2;\r
- \r
-\r
- A->len_aln=strlen (ns1);\r
- return A;\r
-}\r
-Alignment *degap_aln (Alignment *A)\r
-{\r
- //Reomove all the gaps\r
- int a;\r
- for ( a=0; a< A->nseq; a++)ungap (A->seq_al[a]);\r
- return A;\r
-}\r
-\r
-Alignment *ungap_aln_n ( Alignment *A, int p)\r
- {\r
-/*remove all the columns of gap-only within an alignment*/ \r
- int a, b, c;\r
- int t;\r
- int gp;\r
-\r
- if ( A->nseq==0)return A;\r
-\r
- for ( c=0,a=0; a< A->len_aln; a++)\r
- {\r
- for ( t=0,b=0; b<A->nseq; b++)\r
- t+=is_gap(A->seq_al[b][a]);\r
- gp=(t*100)/A->nseq;\r
- if (p>0 && (gp>=p || (t==A->nseq && p==100) || (t && p==1)));//Remove columns containing more than p% gaps\r
- else if (p<0 && (gp<=p || (t==0 && p==-100) ||(t && p==-1)));//remove columns containing less than p% gaps\r
- else\r
- {\r
- for ( b=0; b<A->nseq; b++)\r
- A->seq_al[b][c]=A->seq_al[b][a];\r
- c++;\r
- }\r
- }\r
- for ( b=0; b<A->nseq; b++)A->seq_al[b][c]='\0';\r
- A->len_aln=c; \r
- return A;\r
- }\r
-\r
-Alignment *ungap_aln ( Alignment *A)\r
-{\r
- return ungap_aln_n (A, 100);\r
-}\r
-/*\r
-Alignment *ungap_aln ( Alignment *A)\r
- {\r
- int a, b, c,t;\r
- \r
- for ( c=0,a=0; a< A->len_aln; a++)\r
- {\r
- for ( t=0,b=0; b<A->nseq; b++)\r
- t+=is_gap(A->seq_al[b][a]);\r
- if (t==A->nseq);\r
- else\r
- {\r
- for ( b=0; b<A->nseq; b++)\r
- A->seq_al[b][c]=A->seq_al[b][a];\r
- c++;\r
- }\r
- }\r
- for ( b=0; b<A->nseq; b++)A->seq_al[b][c]='\0';\r
- A->len_aln=c; \r
- return A;\r
-\r
- }\r
-*/\r
-\r
-\r
-Alignment *remove_end (Alignment *A)\r
- {\r
- int a, b, d;\r
- int left, right;\r
-\r
- for (a=0; a< A->len_aln; a++)\r
- {\r
- for ( b=0, d=0; b< A->nseq; b++)\r
- if ( !is_gap(A->seq_al[b][a]))d++;\r
- if ( d>1)break;\r
- }\r
- left=a;\r
- for (a=A->len_aln-1; a>0; a--)\r
- {\r
- for ( b=0, d=0; b< A->nseq; b++)\r
- if ( !is_gap(A->seq_al[b][a]))d++;\r
- if ( d>1)break;\r
- }\r
- right=a;\r
-\r
- return extract_aln(A, left, right+1);\r
- }\r
-\r
-Alignment* condense_aln (Alignment *A)\r
-{\r
- /* condense complementarz columns:\r
- X- X\r
- -X ....>X\r
- X- X\r
-\r
- */\r
- int a, b, plen, n,m, r1, r2;\r
- \r
- plen=0;\r
- while ( A->len_aln !=plen)\r
- {\r
- plen=A->len_aln;\r
- for ( a=0; a< A->len_aln-1; a++)\r
- {\r
- for ( n=m=b=0; b< A->nseq; b++)\r
- {\r
- r1=is_gap(A->seq_al[b][a]);\r
- r2=is_gap(A->seq_al[b][a+1]);\r
- n+=(r1 || r2);\r
- m+=r1;\r
- }\r
- \r
- if ( n==A->nseq && m!=A->nseq)\r
- {\r
- for (b=0; b< A->nseq; b++)\r
- {\r
- if (!is_gap(A->seq_al[b][a+1]))\r
- {\r
- A->seq_al[b][a]=A->seq_al[b][a+1];\r
- A->seq_al[b][a+1]='-';\r
- }\r
- }\r
- a++;\r
- }\r
- }\r
- }\r
- A=ungap_aln(A); \r
- return A;\r
-}\r
- \r
- \r
- \r
-\r
-void compress_aln ( Alignment *A)\r
- {\r
-\r
- /*remove all the columns of gap-only within an alignment*/ \r
- int a, b, c, d;\r
- \r
- \r
-\r
- for (c=0, a=0; a< A->len_aln; a++)\r
- {\r
- for ( b=0, d=0; b< A->nseq; b++)\r
- if ( A->seq_al[b][a]!='-'){d=1; break;}\r
- if ( d==0);\r
- else\r
- {\r
- for (b=0; b< A->nseq; b++)\r
- A->seq_al[b][c]=A->seq_al[b][a];\r
- c++;\r
- }\r
- }\r
- A->len_aln=c;\r
- \r
- for ( a=0; a< A->nseq; a++)\r
- A->seq_al[a][c]='\0';\r
- }\r
-\r
-Alignment *seq_coor2aln ( Sequence *S, Alignment *A, int **coor, int nseq)\r
- {\r
- int a;\r
- char *buf;\r
-\r
- A=realloc_alignment2(A, nseq, return_maxlen ( S->seq, S->nseq)+1);\r
- for ( a=0; a< S->nseq; a++)sprintf ( A->file[a], "%s", S->file[a]);\r
- for ( a=0; a< nseq; a++)\r
- {\r
- sprintf (A->name[a], "Repeat_%d_%d", a, coor[a][0]);\r
- buf=extract_char ( S->seq[coor[a][0]], coor[a][1]-1, coor[a][2]);\r
- sprintf ( A->seq_al[a],"%s", buf);\r
- vfree(buf);\r
- A->order[a][0]=0;\r
- A->order[a][1]=coor[a][1]-1;\r
- }\r
- A->nseq=nseq;\r
- return A;\r
- }\r
-\r
-Alignment *strings2aln (int nseq,...)\r
- {\r
- /*strings2aln(nseq, <name1>, <seq1>, <name2>, <seq2>....)*/\r
- va_list ap;\r
- char **list, **list2;\r
- char **name, **name2;\r
- Sequence *S;\r
- Alignment *A;\r
- int a, max;\r
-\r
- va_start(ap, nseq);\r
- list=vcalloc (nseq, sizeof (char*));\r
- name=vcalloc (nseq, sizeof (char*));\r
- for ( a=0; a< nseq; a++)\r
- {\r
- name[a]=va_arg(ap,char*);\r
- list[a]=va_arg(ap,char*);\r
- \r
- }\r
- va_end(ap);\r
- \r
- for ( max=0,a=0; a< nseq; a++)\r
- {\r
- max=(strlen (list[a])>max)?strlen(list[a]):max;\r
- }\r
- list2=declare_char (nseq, max+1);\r
- name2=declare_char (nseq, MAXNAMES+1);\r
- \r
- for ( a=0; a< nseq; a++)\r
- {\r
- sprintf ( list2[a], "%s", list[a]);\r
- sprintf ( name2[a], "%s", name[a]);\r
- }\r
-\r
- \r
- S=fill_sequence_struc(nseq,list2,name2);\r
- \r
- free_char (list2, -1);\r
- free_char (name2, -1);\r
- vfree (list);\r
- vfree(name);\r
- A=seq2aln(S,NULL, 1); \r
- return A;\r
- }\r
-Alignment *seq2aln ( Sequence *S, Alignment *A,int rm_gap)\r
- {\r
- int a;\r
-\r
- A=realloc_alignment2(A, S->nseq, S->max_len+1); \r
- for ( a=0; a< S->nseq; a++)sprintf ( A->file[a], "%s", S->file[a]);\r
- A->nseq=S->nseq;\r
- A->max_len=S->max_len;\r
- A->min_len=S->min_len;\r
-\r
- for ( a=0; a< S->nseq; a++)\r
- {\r
- A->order[a][0]=a;\r
- A->order[a][1]=0;\r
-\r
- sprintf ( A->seq_comment[a], "%s", S->seq_comment[a]);\r
- sprintf ( A->aln_comment[a], "%s", S->aln_comment[a]);\r
- \r
- sprintf ( A->name[a], "%s", S->name[a]);\r
- sprintf ( A->seq_al[a], "%s", S->seq[a]);\r
- \r
- ungap ( A->seq_al[a]);\r
- A->len[a]=strlen ( A->seq_al[a]); \r
- \r
- if ( rm_gap==0 || rm_gap==NO_PAD)sprintf ( A->seq_al[a], "%s", S->seq[a]);\r
- \r
- }\r
- if (rm_gap!=NO_PAD)padd_aln (A);\r
- A->S=S;\r
- return A;\r
- }\r
-\r
-Alignment *padd_aln ( Alignment *A)\r
-{\r
- A->seq_al=padd_string (A->seq_al, A->nseq, '-');\r
- A->len_aln=strlen (A->seq_al[0]);\r
- return A;\r
-}\r
-\r
-char **padd_string ( char **string, int n,char pad)\r
-{\r
- /*Pads a the strings so that they all have the same length*/\r
- \r
- int max_len, a;\r
- char *buf;\r
- \r
- max_len=get_longest_string (string,n, NULL, NULL);\r
- for (a=0; a<n; a++)\r
- {\r
- buf=generate_null (max_len-strlen (string[a]));\r
- strcat ( string[a], buf); \r
- vfree (buf); \r
- } \r
- return string;\r
-}\r
-\r
-Alignment * trim_aln_with_seq ( Alignment *S, Alignment *P)\r
-{\r
- Alignment *A, *R;\r
- int a, b, c;\r
- static int seqindex;\r
- P=aln2profile (P);\r
- S=aln2profile (S);\r
- \r
- A=align_two_aln (S,P, "blosum62mt",-8,-1, "myers_miller_pair_wise");\r
- for (a=0; a<A->nseq; a++) sprintf (A->name[a], "tmpname_%d", seqindex++);\r
- \r
- R=copy_aln (A, NULL);\r
- for (c=0, a=0; a< A->len_aln; a++)\r
- {\r
- if ( is_gap (A->seq_al[0][a]));\r
- else\r
- {\r
- for ( b=0; b<A->nseq; b++)\r
- R->seq_al[b][c]=A->seq_al[b][a];\r
- c++;\r
- }\r
- }\r
- for ( a=0; a< A->nseq; a++)R->seq_al[a][c]='\0';\r
- R->len_aln=c;\r
- R->S=aln2seq (R);\r
- \r
- free_aln (S);\r
- free_aln (P);\r
- free_aln (A);\r
-\r
- return R;\r
-}\r
-\r
-Alignment * add_align_seq2aln ( Alignment *A, char *seq, char *seq_name)\r
- {\r
- if ( !A)\r
- {\r
- A=declare_aln (NULL);\r
- A=realloc_aln2 ( A, 1, strlen (seq)+1);\r
- A->nseq=0;\r
- sprintf ( A->name[A->nseq], "%s", seq_name);\r
- sprintf ( A->seq_al[A->nseq], "%s", seq);\r
- A->nseq++;\r
- \r
- } \r
- else if ( strlen (seq)!=A->len_aln)\r
- {\r
- fprintf ( stderr, "\nError: Attempt to stack incompatible aln and aligned sequence[FATAL]\n");\r
- myexit (EXIT_FAILURE);\r
- A=NULL;\r
- }\r
- else\r
- {\r
-\r
- A=realloc_aln2 ( A, A->nseq+1, A->len_aln+1);\r
- sprintf ( A->name[A->nseq], "%s", seq_name);\r
- sprintf ( A->seq_al[A->nseq], "%s", seq);\r
- A->nseq++;\r
- }\r
- return A;\r
- }\r
- \r
-\r
-Alignment *aln2number (Alignment *A)\r
- {\r
- A->seq_al=char_array2number(A->seq_al, A->nseq);\r
- return A;\r
- }\r
-Sequence *seq2number (Sequence *A)\r
- {\r
- A->seq=char_array2number(A->seq, A->nseq);\r
- return A;\r
- }\r
-\r
-Sequence * aln2seq (Alignment *A)\r
-{\r
- return aln2seq_main(A, RM_GAP);\r
-}\r
-Sequence * aln2seq_main (Alignment *A, int mode)\r
- {\r
- Sequence *LS;\r
- int a;\r
- int maxlen;\r
- \r
- if ( !A) return NULL;\r
- else if ( A->nseq==0)return NULL;\r
- for (maxlen=0,a=0; a<A->nseq; a++)maxlen=MAX(maxlen, strlen (A->seq_al[a]));\r
-\r
- \r
- LS=declare_sequence ( maxlen+1, maxlen+1, A->nseq);\r
- LS->nseq=A->nseq;\r
- for ( a=0; a< LS->nseq; a++)\r
- {\r
- sprintf (LS->file[a],"%s", A->file[a]); \r
- \r
- sprintf ( LS->seq[a], "%s", A->seq_al[a]);\r
- \r
- if (mode==RM_GAP)ungap ( LS->seq[a]);\r
- \r
- LS->len[a]=strlen ( LS->seq[a]);\r
-\r
- sprintf ( LS->seq_comment[a], A->seq_comment[a]);\r
- sprintf ( LS->aln_comment[a], A->aln_comment[a]);\r
- sprintf ( LS->name[a], "%s", A->name[a]);\r
- }\r
- return LS;\r
- }\r
-\r
-Sequence *keep_residues_in_seq ( Sequence *S, char *list, char replacement)\r
-{\r
- Alignment *A=NULL;\r
- int a;\r
- \r
- A=seq2aln (S, A,1);\r
- A=keep_residues_in_aln ( A, list, replacement);\r
- for ( a=0; a< A->nseq; a++)\r
- {\r
- ungap (A->seq_al[a]);\r
- sprintf ( S->seq[a], "%s", A->seq_al[a]);\r
- }\r
- free_aln (A);\r
- return S;\r
-}\r
-\r
-\r
-Alignment *aln2short_aln ( Alignment *A, char *list, char *new, int spacer)\r
-{\r
- int a, b, r, cl, l;\r
- char *buf;\r
- \r
- for ( a=0; a< A->nseq; a++)\r
- {\r
- buf=vcalloc ( strlen (A->seq_al[a])+1, sizeof (char));\r
- \r
- for (l=0,cl=0, b=0; b< A->len_aln; b++)\r
- {\r
- r=A->seq_al[a][b];\r
- if ( is_gap(r));\r
- else if ( is_in_set (r, list))\r
- {\r
- if (cl){cl=0; buf[l++]=new[0];}\r
- buf[l++]=r;\r
- }\r
- else\r
- {\r
- if ( cl==spacer){buf[l++]=new[0];cl=0;}\r
- cl++;\r
- }\r
- \r
- }\r
- \r
- buf[l]='\0';\r
- sprintf (A->seq_al[a], "%s", buf);\r
- vfree (buf);\r
- }\r
- return A;\r
-}\r
- \r
-Alignment *keep_residues_in_aln ( Alignment *A, char *list, char replacement)\r
-{\r
- return filter_keep_residues_in_aln (A,NULL, 0, -1, list, replacement);\r
-}\r
-Alignment *filter_keep_residues_in_aln ( Alignment *A,Alignment *ST, int use_cons, int value, char *list, char replacement)\r
-{\r
- char **sl;\r
- int n, a;\r
- \r
- n=strlen (list);\r
- sl=declare_char (n+1, 256);\r
- for (a=0; a< n; a++)\r
- sprintf ( sl[a], "%c%c", list[a], list[a]);\r
- sprintf ( sl[a],"#%c", replacement);\r
- A=filter_aln_convert (A, ST,use_cons,value, n+1, sl);\r
- free_char (sl, -1);\r
- return A;\r
-}\r
- \r
-\r
-Alignment *filter_convert_aln ( Alignment *A,Alignment *ST, int use_cons, int value, int n, ...)\r
-{\r
- va_list ap;\r
- char **sl;\r
- int a;\r
- va_start (ap, n);\r
- sl=vcalloc ( n,sizeof(char*));\r
- for ( a=0; a< n; a++)\r
- {\r
- sl[a]=va_arg(ap, char * );\r
- }\r
- va_end(ap);\r
- A=filter_aln_convert (A,ST,use_cons,value, n,sl);\r
- vfree(sl);\r
- return A;\r
-}\r
-\r
-Alignment * filter_aln ( Alignment *A, Alignment *ST, int value)\r
- {\r
- return filter_aln_convert (A, ST,0,value,DELETE, NULL);\r
- }\r
-Alignment * filter_aln_switchcase ( Alignment *A, Alignment *ST,int use_cons, int value)\r
- {\r
- return filter_aln_convert (A, ST,0,value,SWITCHCASE, NULL);\r
- }\r
-Alignment * filter_aln_upper_lower ( Alignment *A, Alignment *ST,int use_cons, int value)\r
- {\r
- return filter_aln_convert (A, ST,use_cons,value, LOWER, NULL);\r
- }\r
-Alignment * filter_aln_lower_upper ( Alignment *A, Alignment *ST,int use_cons, int value)\r
- {\r
- \r
- return filter_aln_convert (A, ST,use_cons,value, UPPER, NULL);\r
- }\r
-Alignment * STseq2STaln ( Alignment *A, Alignment *ST)\r
- {\r
- int a, i=0;\r
- \r
- if (ST && ST->len_aln !=A->len_aln)\r
- {\r
- Sequence *S_T, *S_A;\r
-\r
- S_T=aln2seq (ST);\r
- S_A=aln2seq (A);\r
- \r
- for (a=0; a< A->nseq; a++)\r
- {\r
- i=name_is_in_list (A->name[a], S_T->name,S_T->nseq, 100);\r
- if (i!=-1)\r
- {\r
- char *s1, *s2;\r
- s1=(S_T)->seq[i];ungap(s1);\r
- s2=(S_A)->seq[a];ungap(s2);\r
- \r
- if ( strlen (s1)!=strlen(s2))\r
- {\r
- fprintf ( stderr, "%s\n%s\n", s1, s2);\r
- printf_exit (EXIT_FAILURE, stderr, "ERROR: Sequence %s has different length in the alignment and in the structure Alignment [FATAL:%s]\n", A->name[a], PROGRAM);\r
- }\r
- }\r
- }\r
- ST=copy_aln (A, ST);\r
- thread_seq_struc2aln (ST,S_T);\r
- }\r
- \r
- return ST;\r
- }\r
-Alignment * merge_annotation ( Alignment *A, Alignment *ST, char *seq)\r
-{\r
- int s, a, b;\r
- \r
- ST=STseq2STaln (A, ST);\r
- if ( seq==NULL)s=0;\r
- else\r
- s=name_is_in_list ( seq, A->name, A->nseq, 100);\r
- \r
- if (s==-1)\r
- {\r
- add_warning ( stderr, "\nERROR: %s is not in your MSA [FATAL: %s]", PROGRAM);\r
- myexit (EXIT_FAILURE);\r
- }\r
- \r
- for (a=0; a<A->len_aln; a++)\r
- {\r
- int t, r;\r
- \r
- t=A->seq_al[s][a];\r
- if (is_gap (t))continue;\r
- for (b=0; b<A->nseq; b++)\r
- {\r
- t=A->seq_al[s][a];\r
- r=ST->seq_al[b][a];\r
- if ( isdigit (r))\r
- {\r
- if (!isdigit(t) || (isdigit (t) && t<r))\r
- A->seq_al[s][a]=r;\r
- }\r
- }\r
- }\r
- return A;\r
-}\r
- \r
- \r
-\r
-Alignment * filter_aln_convert ( Alignment *A, Alignment *ST,int use_cons, int value, int n_symbol,char **symbol_list)\r
- {\r
- int a, b, c;\r
- int st;\r
- int cons=0;\r
- \r
- \r
- ST=STseq2STaln (A, ST);\r
- if ( ST && use_cons)\r
- {\r
- cons=name_is_in_list ("con", ST->name,ST->nseq+1, 100);\r
- if ( cons==-1)cons=name_is_in_list ("cons", ST->name,ST->nseq+1, 100);\r
- if ( cons==-1)cons=name_is_in_list ("Cons", ST->name,ST->nseq+1, 100);\r
- if ( cons==-1)\r
- {\r
- use_cons=0;\r
- fprintf (stderr, "WARNING: Could Not Use the Consensus Sequence [WARNING:%s]\n", PROGRAM);\r
- }\r
- }\r
- \r
- A->residue_case=KEEP_CASE;\r
- for ( a=0; a< A->nseq; a++)\r
- {\r
- if(value!=10 && ST && !use_cons)\r
- {\r
- c=name_is_in_list (A->name[a], ST->name, ST->nseq,100);\r
- if (c==-1)st=11;\r
- }\r
- \r
- for ( b=0; b< A->len_aln; b++)\r
- {\r
- if ( value==10 || !ST)st=11;\r
- else if ( ST && use_cons)\r
- {\r
- st=(isdigit(ST->seq_al[cons][b]))?ST->seq_al[cons][b]-'0':ST->seq_al[cons][b];\r
- }\r
- else st=(isdigit(ST->seq_al[c][b]))?ST->seq_al[c][b]-'0':ST->seq_al[c][b];\r
- \r
- \r
- if ( st==value || value==-1 || st==NO_COLOR_RESIDUE) \r
- {\r
- if ( n_symbol==UPPER && !symbol_list)A->seq_al[a][b]=toupper (A->seq_al[a][b]);\r
- else if ( n_symbol==LOWER && !symbol_list)A->seq_al[a][b]=tolower (A->seq_al[a][b]);\r
- else if ( n_symbol==SWITCHCASE && !symbol_list)\r
- {\r
- if ( !isalpha(A->seq_al[a][b]));\r
- else if (isupper (A->seq_al[a][b]))A->seq_al[a][b]=tolower (A->seq_al[a][b]);\r
- else if (islower (A->seq_al[a][b]))A->seq_al[a][b]=toupper (A->seq_al[a][b]);\r
- }\r
- else if ( n_symbol==DELETE && !symbol_list)A->seq_al[a][b]='-';\r
- else\r
- {\r
- A->seq_al[a][b]=convert(A->seq_al[a][b],n_symbol,symbol_list);\r
- }\r
- }\r
- \r
- }\r
- }\r
- return A;\r
- }\r
-\r
-\r
-char ** sar_aln2motif (Alignment *A, Alignment *B, int *pos, int c);\r
-char ** sar_aln2motif (Alignment *A, Alignment *B, int *pos, int c)\r
-{\r
- static Alignment *I;\r
- static Alignment *O;\r
- int a, b, o, i;\r
-\r
- float tp,tn,fp,fn,best, sp, sn, sen2;\r
- float best_pred=-1;\r
- int best_motif=0;\r
-\r
-\r
- int n1;\r
- static char ***alp;\r
- static int *alp_size;\r
- \r
- char ***motif_list;\r
- int n;\r
- \r
-\r
- if (!I)\r
- {\r
- I=copy_aln(A, NULL);\r
- O=copy_aln(A, NULL);\r
- }\r
- \r
-\r
- \r
- I->nseq=O->nseq=I->len_aln=O->len_aln=0;\r
- for (a=0; a<A->len_aln; a++)\r
- {\r
- if (pos[a])\r
- {\r
- for (i=o=0,b=0; b<A->nseq; b++)\r
- {\r
- \r
- if ( is_gap(A->seq_al[b][a]))return 0;\r
- if (B->seq_al[b][c]=='I')I->seq_al[i++][I->len_aln]=A->seq_al[b][a];\r
- else O->seq_al[o++][O->len_aln]=A->seq_al[b][a];\r
- }\r
- I->len_aln++;\r
- O->len_aln++;\r
- }\r
- }\r
- \r
- if (O->len_aln==0 || I->len_aln==0) return 0;\r
- O->nseq=o;\r
- I->nseq=i;\r
- for (a=0; a<o; a++)O->seq_al[a][O->len_aln]='\0';\r
- for (a=0; a<i; a++)I->seq_al[a][I->len_aln]='\0';\r
-\r
- alp=vcalloc ( sizeof (char**), I->len_aln);\r
- alp_size= vcalloc ( I->len_aln, sizeof (int));\r
- for (a=0; a<I->len_aln; a++)\r
- {\r
- char *col;\r
- alp[a]=string2alphabet ( (col=aln_column2string (I,a)),2, &alp_size[a]);\r
- vfree (col);\r
- }\r
-\r
- \r
- \r
- motif_list=generate_array_string_list (I->len_aln, alp, alp_size, &n, NULL, OVERLAP);\r
- best_pred=best_motif=0;\r
- for (a=0; a<n; a++)\r
- {\r
- \r
- tp=tn=fp=fn=0;\r
- \r
- for (b=0; b<I->nseq; b++)\r
- {\r
- if (match_motif (I->seq_al[b], motif_list[a]))tp++;\r
- else fn++;\r
- }\r
- for (b=0; b<O->nseq; b++)\r
- {\r
- if (match_motif (O->seq_al[b], motif_list[a]))fp++;\r
- else tn++;\r
- }\r
- rates2sensitivity (tp, tn, fp, fn, &sp, &sn, &sen2, &best);\r
- \r
- if (best> best_pred)\r
- {\r
- best_pred=best;\r
- best_motif=a;\r
- }\r
- }\r
- \r
- output_Alignment_without_header ( I, stdout);\r
- fprintf ( stdout, "\n");\r
- output_Alignment_without_header ( O, stdout);\r
- \r
- \r
- fprintf ( stdout, "\nMotifCompound %d pred: %.2f motif: ", c, best_pred);\r
- for (n1=0, a=0; a<I->len_aln; a++) \r
- {\r
- char *m;\r
- int l;\r
- m=motif_list[best_motif][a];\r
- fprintf ( stdout, "[%s]-", m);\r
- l=strlen (m);\r
- n1+=(l==1 && !strm ("*",m) )?1:0;\r
- }\r
- fprintf (stdout, "SCORE: %d", n1);\r
- \r
- for (a=0; a<n; a++)vfree (motif_list[a]);\r
- vfree (motif_list);\r
- free_arrayN((void ***) alp, 3);\r
- vfree (alp_size);\r
- \r
- return NULL;\r
-}\r
-\r
-\r
-\r
-\r
-void explore_weight_matrix (Alignment *A, Alignment *B, int range, int n, int *array);\r
-void explore_weight_matrix (Alignment *A, Alignment *B, int range, int n, int *array)\r
-{\r
- int a;\r
- if ( n==A->len_aln)\r
- {\r
- fprintf ( stdout, "\n W:");\r
- for (a=0; a<A->len_aln; a++)fprintf ( stdout, "%d", array[a]);\r
- fprintf ( stdout, " %.4f",(float)sar_aln2r(A,B,array,0));\r
- return;\r
- }\r
- else\r
- {\r
- for ( a=0; a<range; a++)\r
- {\r
- array[n]=a;\r
- explore_weight_matrix (A, B, range, n+1, array);\r
- }\r
- }\r
-}\r
-float search_best_combo(Alignment *A, Alignment *B);\r
-void search_best_combo_sar_aln(Alignment *A, Alignment *B);\r
-void search_best_combo_sar_aln(Alignment *A, Alignment *B)\r
-{\r
- int a,b,c;\r
- Alignment *S;\r
- float s;\r
- int w=5;\r
- \r
- S=copy_aln (B, NULL);\r
- S->len_aln=w;\r
- for ( a=0; a<B->len_aln-w;a++)\r
- {\r
- for (b=0; b<B->nseq; b++)\r
- {\r
- for (c=0; c<w; c++)\r
- {\r
- S->seq_al[b][c]=B->seq_al[b][a+c];\r
- }\r
- S->seq_al[b][c]='\0';\r
- }\r
- \r
- s=search_best_combo (A, S);\r
- fprintf ( stdout,"\nP: XXXX \nP: XXXXX A=%d / %d", a, B->len_aln);\r
- \r
- }\r
- \r
-}\r
-\r
-float search_best_combo(Alignment *A, Alignment *B)\r
-{\r
- int a, b, c, d, best_pos,nl, max;\r
- float best_score, score;\r
- int *list, *pos;\r
- \r
- int w;\r
- int combo_mode=1; //1: greedy 2: consider all thw w combinations;\r
- FILE *fp2;\r
- static int **M;\r
- max=2;\r
- int delta=0;\r
- w=1;\r
- \r
- pos=vcalloc ( A->len_aln, sizeof (int));\r
- list=vcalloc (A->len_aln, sizeof (int));\r
- nl=0;\r
- \r
- if ( combo_mode==1)\r
- {\r
- for (a=0; a< max; a++)\r
- {\r
- for (best_score=-9999,best_pos=0,b=0; b< A->len_aln-w; b++)\r
- {\r
- for (c=0; c<nl; c++)pos[list[c]]=1;\r
- for (c=0; c<w; c++)pos[b+c]=1;\r
- score=sar_aln2r(A,B,pos,0);\r
- if ( score>best_score)\r
- {\r
- best_score=score;\r
- best_pos=b;\r
- }\r
- for (c=0; c<w; c++)pos[b+c]=0;\r
- }\r
- if (best_pos==list[nl-1])break;\r
- list[nl++]=best_pos;\r
- for (b=0; b<nl; b++) pos[list[b]]=1;\r
- fprintf ( stdout, "\n%2d P: %d S:%.3f Delta= %d", nl,best_pos, best_score, (int)sar_aln2delta(A,B, pos,0));\r
- for (b=0; b<nl; b++) pos[list[b]]=0; \r
- \r
-\r
- }\r
- for (a=0; a<nl; a++) pos[list[a]]=1;\r
- fprintf ( stdout, "\nR: %3f " ,(float)sar_aln2r(A,B,pos,1));\r
- \r
- }\r
- else if ( combo_mode==2)\r
- {\r
- int *array;\r
- char *tmpf;\r
- FILE *fp;\r
- char *buf=NULL;\r
- int *preset, n_preset;\r
- \r
- tmpf=vtmpnam (NULL);\r
- max=1;\r
- generate_array_int_list (max, 0,A->len_aln-1, 1,NULL, tmpf);\r
- printf_system ( "cp %s testfile", tmpf);\r
- buf=vcalloc ( 1000, sizeof (char));\r
- fp=vfopen (tmpf, "r");\r
- best_score=-99999;\r
- \r
- n_preset=0;\r
- preset=vcalloc (A->len_aln, sizeof (int));\r
- preset[n_preset++]=353;\r
- preset[n_preset++]=361;\r
- //preset[n_preset++]=365;\r
- //preset[n_preset++]=187;\r
- //preset[n_preset++]=397;\r
- //preset[n_preset++]=492;\r
-\r
- \r
- while ( (buf=vfgets ( buf, fp))!=NULL)\r
- {\r
-\r
- array=string2num_list (buf);\r
-\r
- for (a=1; a<=max; a++)\r
- {\r
- pos[array[a]]=1;\r
- }\r
- for ( a=0; a<n_preset; a++)pos[preset[a]]=1;\r
- \r
- score=sar_aln2r(A,B,pos,0);\r
- \r
- if ( score>best_score)\r
- {\r
- best_score=score;\r
- fprintf ( stdout, "\n");\r
- for (a=0; a<n_preset; a++)fprintf (stdout, "%2d ", preset[a]);\r
- for (a=1; a<=max; a++)fprintf (stdout, "%2d ", array[a]);\r
- fprintf ( stdout, " R: %.3f", best_score);\r
- for (nl=0,a=0; a<n_preset; a++)list[nl++]=preset[a];\r
- for (a=1; a<=max; a++)list[nl++]=array[a];\r
- }\r
- //if ( score!=0)HERE ("R=%.2f", score);\r
- for (b=1; b<=max; b++)\r
- pos[array[b]]=0;\r
- vfree (array);\r
- }\r
- fprintf ( stdout, "\n");\r
- vfclose (fp);\r
- //for (a=0; a<max; a++)fprintf (stdout, "%2d ", array[best_pos][a]);\r
- //fprintf ( stdout, " R: %.3f", best_score);\r
- }\r
- for (c=0; c<B->len_aln; c++)\r
- {\r
- sar_aln2motif (A,B,pos, c);\r
- \r
- }\r
- exit (0);\r
- HERE ("***************");\r
- fp2=vfopen ("aln.aln", "w");\r
- for (a=0; a<A->nseq; a++)\r
- {\r
- fprintf (fp2, ">%s\n", A->name[a]);\r
- for ( b=0; b<nl; b++)fprintf (fp2, "%c", A->seq_al[a][list[b]]);\r
- fprintf ( fp2, "\n");\r
- }\r
- vfclose (fp2);\r
- HERE ("Output aln.aln");\r
- if (1)\r
- {\r
- float tp=0, tn=0, fp=0, fn=0, pp2=0,pp=0, sn,sn2, sp;\r
- int **result,**result2,**compound_score, *ref_score,n2,n, s, p, c;\r
- Alignment *AI, *AO;\r
- int simI, simO;\r
- \r
- compound_score=declare_int (B->len_aln, 2);\r
- ref_score=vcalloc (nl, sizeof (int));\r
- \r
- result=declare_int (B->len_aln*A->nseq*A->nseq, 2);\r
- result2=declare_int (B->len_aln*A->nseq*A->nseq, 2);\r
- \r
- for (n2=c=0; c< B->len_aln; c++)\r
- {\r
- \r
- int sar1, sar2;\r
- pp=tp=tn=fp=fn=0;\r
- if (!M)M=read_matrice ("blosum62mt");\r
- for (n=0,a=0; a<A->nseq-1; a++)\r
- {\r
- for (b=a+1; b<A->nseq;b++)\r
- {\r
- for (s=0,p=0; p<nl; p++)\r
- {\r
- char r1, r2;\r
- \r
- r1=A->seq_al[a][list[p]];\r
- r2=A->seq_al[b][list[p]];\r
- if ( !is_gap (r1) && !is_gap(r2))s+=M[r1-'A'][r2-'A'];\r
- }\r
- result2[n2][0]=result[n][0]=s;\r
- \r
- sar1=B->seq_al[a][c];sar2=B->seq_al[b][c];\r
- \r
- if (sar1=='I' && sar1==sar2)\r
- {\r
- result2[n2][1]=result[n][1]=1;\r
- pp++;pp2++;\r
- n++;n2++;\r
- }\r
- else if ( sar1==sar2 && sar1=='O')\r
- {\r
- ;\r
- }\r
- else\r
- {\r
- result2[n2][1]=result[n][1]=0;\r
- n++;n2++;\r
- }\r
- //else if ( s1==s2=='O')result[n][1]=-1;\r
- }\r
- }\r
- \r
- if (pp==0)continue;\r
- sort_int_inv (result, 2, 0, 0, n-1);\r
- \r
- \r
- for (tp=0,a=0; a<n; a++)\r
- {\r
- tp+=result[a][1];\r
- if ((pp-tp) == (a-tp))break;\r
- }\r
- fp=a-tp;\r
- fn=pp-tp;\r
- tn=n-pp;\r
- \r
- sn=(tp/(tp+fn));\r
- sn2=(tp/(tp+fp));\r
- sp=(tn/(tn+fp));\r
- fprintf ( stdout, "\nCompound %3d sn: %.3f sn2: %.3f sp: %.3f MIN: %.3f",c,sn, sn2,sp, MIN((MIN(sn,sn2)),sp));\r
- compound_score[c][0]=c;\r
- compound_score[c][1]=1000*MIN((MIN(sn,sn2)),sp);\r
- }\r
- \r
- sort_int_inv (compound_score,2, 1, 0, B->len_aln-1);\r
-\r
- fp2=vfopen ("compound.fasta", "w");\r
- for (d=0; d<nl; d++)\r
- {\r
- int r1, r2;\r
- for (n=0,a=0;a<A->nseq; a++)\r
- for (b=0; b<A->nseq; b++)\r
- {\r
- r1= A->seq_al[b][list[d]];\r
- r2= A->seq_al[b][list[d]];\r
- if (is_gap(r1) || is_gap(r2))continue;\r
- else \r
- {\r
- ref_score[d]+=M[r1-'A'][r2-'A'];\r
- n++;\r
- }\r
- }\r
- ref_score[d]/=n;\r
- }\r
- AO=copy_aln (A, NULL);\r
- AI=copy_aln (A,NULL);\r
- AO->len_aln=AI->len_aln=nl;\r
- for (a=0; a<A->nseq; a++)AO->seq_al[a][nl]=AI->seq_al[a][nl]='\0';\r
- \r
- for (a=0; a<B->len_aln; a++)\r
- {\r
- fprintf (stdout, "\n>%4d %4d ", compound_score[a][0], compound_score[a][1]);\r
- for (b=0; b<B->nseq; b++) fprintf (stdout, "%c", B->seq_al[b][compound_score[a][0]]);\r
- fprintf ( stdout, "\n");\r
- \r
- for (AI->nseq=0,b=0; b<B->nseq; b++)\r
- {\r
- if (B->seq_al[b][compound_score[a][0]]=='O')continue;\r
- fprintf ( stdout, "\n\t");\r
- for (c=0; c<nl; c++)\r
- {\r
- fprintf ( stdout, "%c", A->seq_al[b][list[c]]);\r
- AI->seq_al[AI->nseq][c]=A->seq_al[b][list[c]];\r
- }\r
- AI->nseq++;\r
- }\r
- fprintf ( stdout, "\n\t");\r
- for (d=0; d<nl; d++)\r
- {\r
- for (score=0,n=0,b=0; b<B->nseq; b++)\r
- {\r
- if (B->seq_al[b][compound_score[a][0]]=='O')continue;\r
- for (c=0; c<B->nseq; c++)\r
- {\r
- if (B->seq_al[c][compound_score[a][0]]=='O')continue;\r
- {\r
- int r1, r2;\r
- \r
- r1= A->seq_al[b][list[d]];\r
- r2= A->seq_al[b][list[d]];\r
- if (is_gap(r1) || is_gap(r2))continue;\r
- else score+=M[r1-'A'][r2-'A'];\r
- n++;\r
- }\r
- }\r
- }\r
- score/=n;\r
- if ((float)score/(float)ref_score[d]>1.2)fprintf ( stdout, "*");\r
- else fprintf ( stdout, " ");\r
- }\r
- for (AO->nseq=0,b=0; b<B->nseq; b++)\r
- {\r
- if (B->seq_al[b][compound_score[a][0]]=='I')continue;\r
- fprintf ( stdout, "\n\t");\r
- for (c=0; c<nl; c++)\r
- {\r
- AO->seq_al[AO->nseq][c]=A->seq_al[b][list[c]];\r
- fprintf ( stdout, "%c", A->seq_al[b][list[c]]);\r
- }\r
- AO->nseq++;\r
- }\r
- simI=aln2sim (AI, "blosum62mt"); simO=aln2sim (AO, "blosum62mt");\r
- fprintf ( stdout, "\nDELTA: I: %d O: %d %d",simI,simO, simI-simO);\r
- delta+=simI-simO;\r
- }\r
-\r
- for ( a=0; a<B->nseq; a++)\r
- {\r
-\r
- fprintf ( fp2, ">%s\n", B->name[a]);\r
- for (b=0; b<B->len_aln/2; b++)\r
- fprintf ( fp2, "%c", B->seq_al[a][compound_score[b][0]]);\r
- fprintf (fp2, "\n");\r
- }\r
- vfclose (fp2);\r
- HERE ("OUTPUT compound.fasta");\r
- result=result2;\r
- n=n2;\r
- pp=pp2;\r
- \r
- sort_int_inv (result, 2, 0, 0, n-1);\r
- \r
- \r
- for (tp=0,a=0; a<n; a++)\r
- {\r
- tp+=result[a][1];\r
- if ((pp-tp) == (a-tp))break;\r
- }\r
- fp=a-tp;\r
- fn=pp-tp;\r
- tn=n-pp;\r
- \r
- sn=(tp/(tp+fn));\r
- sn2=(tp/(tp+fp));\r
- sp=(tn/(tn+fp));\r
- fprintf ( stdout, "\nTOT: sn: %.3f sn2: %.3f sp: %.3f MIN: %.3f",sn, sn2,sp, MIN((MIN(sn,sn2)),sp));\r
- \r
- }\r
- HERE ("Delta= %d", delta);\r
- \r
- \r
- /*\r
- C=copy_aln(A, NULL);\r
- for (a=0; a< nl; a++)\r
- for (b=0; b<A->nseq; b++)\r
- C->seq_al[b][a]=A->seq_al[b][list[a]];\r
- C->len_aln=nl;\r
- array=vcalloc (C->len_aln, sizeof (int));\r
- explore_weight_matrix (C, B, 6,0, array);\r
- */\r
- \r
- return best_score;\r
-}\r
-\r
-\r
-void count_misc (Alignment *A, Alignment *B)\r
-{\r
- int **done, a, b, c, d, e,f, g, *list, n, score;\r
- double **slist, *r;\r
- int *pos;\r
- int w=1;\r
- \r
- search_best_combo (A,B);\r
- exit (0);\r
- pos=vcalloc (A->len_aln+1, sizeof (int));\r
- /*\r
- pos[354]=1;\r
- pos[362]=1;\r
- pos[366]=1;\r
- pos[398]=1;\r
- pos[476]=1;\r
- \r
- \r
- fprintf ( stdout, "\nR: %3f " ,(float)sar_aln2r(A,B,pos,1));exit (0);\r
- */\r
- for (a=0; a< A->len_aln-w; a++)\r
- {\r
- for (c=0; c<w; c++)\r
- {\r
- pos[a+c]=1;\r
- }\r
- pos[398]=1;\r
- pos[362]=1;\r
- pos[354]=1;\r
- pos[366]=1;\r
- pos[419]=1;\r
- pos[494]=1;\r
- pos[476]=1;\r
- pos[337]=1;\r
- fprintf ( stdout, "\nP: %3d W:2 R: %3f ",a+1, (float)sar_aln2r(A,B,pos,0));\r
- for (c=0; c<w; c++)\r
- {\r
- pos[a+c]=0;\r
- }\r
- }\r
- \r
- exit (0);\r
- for (a=0; a<w; a++) pos[a]=1;\r
- for (a=w; a< A->len_aln-1; a++)\r
- {\r
- pos[a-w]=0;\r
- pos[a]=1;\r
- fprintf ( stdout, "\nP: %3d W:2 R: %3f ",a, (float)sar_aln2r(A,B,pos,0));\r
- }\r
- \r
- exit (0);\r
- pos[2]=1;\r
- pos[3]=1;\r
- \r
- \r
- \r
- explore_weight_matrix (A, B,3, 0,pos);\r
- exit (0);\r
-\r
- for (a=0; a<A->len_aln; a++)\r
- for ( b=0; b<A->len_aln; b++)\r
- for (c=0; c<A->len_aln; c++)\r
- for (d=0; d<A->len_aln; d++)\r
- for (f=0; f<A->len_aln; f++)\r
- for (g=0; g<A->len_aln; g++)\r
- {\r
- e=0;\r
- pos[e++]=a;\r
- pos[e++]=b;\r
- pos[e++]=c;\r
- pos[e++]=d;\r
- pos[e++]=f;\r
- pos[e++]=g;\r
- pos[e++]=-1;\r
- fprintf ( stdout, "\n%d %d %d %d %d %d %.3f", a, b,c,d,f, g, sar_aln2r(A,B, pos,0));\r
- \r
- }\r
- \r
- exit (0);\r
-\r
-\r
- slist=declare_double (A->nseq*A->nseq*10, 2);\r
- done=declare_int (256, 256);\r
- list=vcalloc ( A->nseq, sizeof (int));\r
- \r
- for (a=0; a<A->len_aln-1; a++)\r
- {\r
- for (b =0; b<256; b++)for (c=0; c<256; c++)done[b][c]=0;\r
- \r
- for (b=0; b<A->nseq-1; b++)\r
- {\r
- int r1, r2;\r
- r1=A->seq_al[b][a];\r
- r2=A->seq_al[b][a+1];\r
- if (done[r1][r2])continue;\r
- n=0;\r
- done[r1][r2]=1;\r
- list[n++]=b;\r
- fprintf ( stdout, "\n%3d %c%c: %s ",a+1, r1, r2, A->name[b]);\r
- for ( c=b+1; c<A->nseq; c++)\r
- {\r
- if (r1==A->seq_al[c][a] && r2==A->seq_al[c][a+1])\r
- {\r
- fprintf ( stdout, "%s ", A->name[c]);\r
- list[n++]=c;\r
- }\r
-\r
- }\r
- if (B && n>1)\r
- {\r
- for (e=0,score=0,c=0; c<n-1; c++)\r
- for (d=c+1; d<n; d++,e++)\r
- score+=get_sar_sim2(B->seq_al[list[c]], B->seq_al[list[d]]);\r
- fprintf ( stdout, " Score=%d", score/e);\r
- }\r
- }\r
- }\r
- for (score=0,e=0,a=0; a<A->nseq-1; a++)\r
- for (b=a+1; b<A->nseq; b++,e++)\r
- {\r
- score+=get_sar_sim2(B->seq_al[a], B->seq_al[b]);\r
- }\r
- fprintf (stdout,"AVG=%d", score/e);\r
- for (n=0,a=0; a< A->nseq-1; a++)\r
- {\r
- static int **M;\r
- int sim;\r
- if (!M)M=read_matrice ("blosum62mt");\r
- \r
- \r
- for (b=a+1; b<A->nseq; b++)\r
- {\r
- int n11, n01, n10, n00, n1;\r
- \r
- for (sim=d=0;d<A->len_aln; d++)\r
- {\r
- int r1, r2;\r
- r1=A->seq_al[a][d];\r
- r2=A->seq_al[b][d];\r
- sim+=(r1==r2)?1:0;\r
- //sim +=(M[r1-'A'][r2-'A']>0)?1:0;\r
- }\r
- \r
- sim=(100*sim)/(A->len_aln);//+rand()%10;\r
- for (n1=n00=n11=n10=n01=score=0, d=0; d<B->len_aln; d++)\r
- {\r
- int r1, r2;\r
- r1=B->seq_al[a][d];\r
- r2=B->seq_al[b][d];\r
- n11+=(r1=='I' && r2=='I');\r
- n00+=(r1=='O' && r2=='O');\r
- n10+=(r1=='I' && r2=='0');\r
- n01+=(r1=='O' && r2=='I');\r
- n1+=(r1=='I' || r2=='I');\r
- }\r
- score =((n11+n00)*100)/B->len_aln;\r
- \r
- //score=get_sar_sim2(B->seq_al[a], B->seq_al[b]);\r
- \r
- fprintf ( stdout, "\nSIM: %d SC: %d", sim, score);\r
- slist[n][0]=(double)sim;\r
- slist[n][1]=(double)score;\r
- n++;\r
- }\r
- }\r
- r=return_r(slist, n);\r
- fprintf ( stdout, "\nR= %.4f", (float)r[0]);\r
- exit (0);\r
-}\r
-\r
-int aln2ngap ( Alignment *A)\r
-{\r
- int ngap=0, a, b;\r
- for (a=0; a< A->len_aln; a++)\r
- for (b=0; b<A->nseq; b++) ngap+=is_gap (A->seq_al[b][a]);\r
- return ngap;\r
-}\r
-int * count_in_aln ( Alignment *A, Alignment *ST, int value, int n_symbol,char **symbol_list, int *table)\r
- {\r
- int a, b, c=0, d;\r
- int st;\r
- \r
- if (!table)table=vcalloc (n_symbol, sizeof (int));\r
-\r
- A->residue_case=KEEP_CASE;\r
- for ( a=0; a< A->nseq; a++)\r
- {\r
- if(value!=10 && ST)for ( c=0; c< ST->nseq; c++)if ( strm(ST->name[c], A->name[a]))break;\r
- for ( b=0; b< A->len_aln; b++)\r
- {\r
- if ( value==10 || !ST)st=11;\r
- else st=(isdigit(ST->seq_al[c][b]))?ST->seq_al[c][b]-'0':ST->seq_al[c][b];\r
- if ( st==value || value==-1) \r
- {\r
- for ( d=0; d<n_symbol; d++)table[d]+=is_in_set ( A->seq_al[a][b], symbol_list[d]);\r
- }\r
- }\r
- }\r
- return table;\r
- } \r
-\r
-char *dna_aln2cons_seq ( Alignment *A)\r
- {\r
- int a, b, best;\r
- static int **column_count;\r
- static int **old_tot_count;\r
- static int **new_tot_count;\r
- static char *string1, *string2;\r
- int **count_buf;\r
- char r1, r2,*seq;\r
- int NA=0, NG=1, NC=2, NT=3, IGAP=4;\r
- static int MAX_EST_SIZE=10000;\r
- static int size_increment=1000;\r
- static int first;\r
- int overlap=0, best_overlap=0;\r
- \r
-\r
- seq=vcalloc ( A->len_aln+1, sizeof (char));\r
-\r
- if (!column_count )\r
- {\r
- column_count=vcalloc(MAX_EST_SIZE, sizeof (int*));\r
- for ( a=0; a< MAX_EST_SIZE; a++)\r
- column_count[a]=vcalloc (5, sizeof (int));\r
- \r
- old_tot_count=vcalloc(MAX_EST_SIZE, sizeof (int*));\r
- new_tot_count=vcalloc(MAX_EST_SIZE, sizeof (int*));\r
- A->P=declare_profile( "agct-",MAX_EST_SIZE);\r
- string1=vcalloc (MAX_EST_SIZE, sizeof (char));\r
- string2=vcalloc (MAX_EST_SIZE, sizeof (char));\r
- }\r
- else if (A->len_aln>MAX_EST_SIZE)\r
- {\r
- if ( column_count)\r
- {\r
- for ( a=0; a< MAX_EST_SIZE; a++)\r
- vfree(column_count[a]);\r
- vfree(column_count);\r
- vfree(old_tot_count);\r
- vfree(new_tot_count);\r
- vfree(string1);\r
- vfree(string2);\r
- }\r
- \r
- column_count=vcalloc(MAX_EST_SIZE+ size_increment, sizeof (int*));\r
- for ( a=0; a< MAX_EST_SIZE+ size_increment; a++)\r
- column_count[a]=vcalloc (5, sizeof (int));\r
- \r
- old_tot_count=vcalloc(MAX_EST_SIZE+ size_increment, sizeof (int*));\r
- new_tot_count=vcalloc(MAX_EST_SIZE+ size_increment, sizeof (int*));\r
- \r
- for (a=0; a< MAX_EST_SIZE; a++)\r
- {\r
- old_tot_count[a]=*(column_count++);\r
- for ( b=0; b<5; b++)old_tot_count[a][b]=(A->P)->count[b][a];\r
- }\r
- free_int ( (A->P)->count, -1);\r
- \r
- (A->P)->count=declare_int (5, MAX_EST_SIZE+ size_increment);\r
- (A->P)->max_len=MAX_EST_SIZE+ size_increment;\r
- MAX_EST_SIZE+= size_increment;\r
- string1=vcalloc (MAX_EST_SIZE, sizeof (char));\r
- string2=vcalloc (MAX_EST_SIZE, sizeof (char));\r
- }\r
- \r
- \r
- sprintf ( string1, "%s",A->seq_al[0]);\r
- sprintf ( string2, "%s",A->seq_al[1]);\r
- \r
-\r
- string1=mark_internal_gaps(string1,'.');\r
- string2=mark_internal_gaps(string2,'.');\r
-\r
- \r
- \r
- for (b=0,a=0; a< A->len_aln; a++)\r
- {\r
- r1=string1[a];\r
- r2=string2[a];\r
- \r
- if ( r1==r2)\r
- {\r
- overlap++;\r
- }\r
- else\r
- {\r
- best_overlap=MAX(overlap, best_overlap);\r
- overlap=0;\r
- }\r
-\r
-\r
- if (!is_gap(r1) && first==1)new_tot_count[a]=old_tot_count[b++]; \r
- else if (is_gap(r1) || first==0){new_tot_count[a]=*column_count;column_count++;};\r
- \r
- if ( first==0)\r
- {\r
- if(r1=='a') new_tot_count[a][NA]++;\r
- else if ( r1=='g')new_tot_count[a][NG]++;\r
- else if ( r1=='c')new_tot_count[a][NC]++;\r
- else if ( r1=='t')new_tot_count[a][NT]++; \r
- else if (is_gap(r1));\r
- else\r
- {\r
- new_tot_count[a][NA]++;\r
- new_tot_count[a][NG]++;\r
- new_tot_count[a][NC]++;\r
- new_tot_count[a][NT]++;\r
- }\r
- }\r
- if ( a> 0 && a<A->len_aln-1 && r1=='.')\r
- {\r
- new_tot_count[a][IGAP]+=((new_tot_count[a-1][NA]+new_tot_count[a-1][NG]+new_tot_count[a-1][NC]+new_tot_count[a-1][NT]));\r
- }\r
- \r
-\r
- if(r2=='a') new_tot_count[a][NA]++;\r
- else if ( r2=='g')new_tot_count[a][NG]++;\r
- else if ( r2=='c')new_tot_count[a][NC]++;\r
- else if ( r2=='t')new_tot_count[a][NT]++;\r
- else if ( r2=='.')new_tot_count[a][IGAP]++;\r
- else if ( r2=='-');\r
- else \r
- {\r
- new_tot_count[a][NA]++;\r
- new_tot_count[a][NG]++;\r
- new_tot_count[a][NC]++;\r
- new_tot_count[a][NT]++; \r
- }\r
- (A->P)->count[0][a]=new_tot_count[a][NA];\r
- (A->P)->count[1][a]=new_tot_count[a][NG];\r
- (A->P)->count[2][a]=new_tot_count[a][NC];\r
- (A->P)->count[3][a]=new_tot_count[a][NT];\r
- (A->P)->count[4][a]=new_tot_count[a][IGAP];\r
-\r
- best_int(4,1, &best,new_tot_count[a][NA], new_tot_count[a][NG],new_tot_count[a][NC],new_tot_count[a][NT]); \r
- if( best==0) seq[a]='a';\r
- else if ( best==1)seq[a]='g';\r
- else if ( best==2)seq[a]='c';\r
- else if ( best==3)seq[a]='t';\r
- }\r
-\r
- first=1;\r
-\r
- seq[a]='\0';\r
- fprintf ( stderr, "[Best Overlap: %d Residues]", best_overlap);\r
- count_buf=old_tot_count;\r
- old_tot_count=new_tot_count;\r
- new_tot_count=count_buf;\r
-\r
- return seq;\r
- \r
- }\r
-\r
-char *aln2cons_maj ( Alignment *A, int ns, int *ls, int n_groups, char **group_list)\r
- {\r
- char *seq;\r
- int a, b;\r
- int len;\r
- int clean_ls=0;\r
- static int *aa;\r
-\r
- if ( !aa) aa=vcalloc (1000, sizeof (int));\r
- \r
- len=strlen (A->seq_al[ls[0]]);\r
- seq=vcalloc (len+1, sizeof (char));\r
-\r
- if ( ns==0)\r
- {\r
- ns=A->nseq;\r
- ls=vcalloc ( A->nseq, sizeof (int));\r
- for ( a=0; a< A->nseq; a++)ls[a]=a;\r
- clean_ls=1;\r
- }\r
- \r
- for ( a=0; a<len; a++)\r
- {\r
- int best_s=0, best_aa=0, r;\r
- for (b=0; b< ns; b++)\r
- {\r
- r=tolower(A->seq_al[ls[b]][a]);\r
- aa[r]++;\r
- if (!is_gap(r) && aa[r]>best_s)\r
- {\r
- best_s=aa[r];\r
- best_aa=r;\r
- }\r
- seq[a]=best_aa;\r
- }\r
- for (best_s=0, best_aa=0,b=0; b< ns; b++)\r
- {\r
- aa[tolower(A->seq_al[ls[b]][a])]=0;\r
- }\r
- }\r
- if ( clean_ls)vfree(ls);\r
- seq[a]='\0';\r
- \r
- return seq;\r
- }\r
-\r
-char *aln2cons_seq ( Alignment *A, int ns, int *ls, int n_groups, char **group_list)\r
- {\r
- char *seq;\r
- int a, b, c;\r
- int best_group=0;\r
- int aa_group=0;\r
- int *group;\r
- int len;\r
- int clean_ls=0;\r
- \r
- len=strlen (A->seq_al[ls[0]]);\r
- seq=vcalloc (len+1, sizeof (char));\r
-\r
- if ( ns==0)\r
- {\r
- ns=A->nseq;\r
- ls=vcalloc ( A->nseq, sizeof (int));\r
- for ( a=0; a< A->nseq; a++)ls[a]=a;\r
- clean_ls=1;\r
- }\r
-\r
-\r
- if ( !group_list)\r
- {\r
- group_list=declare_char ( 26, 2);\r
- for ( a=0; a<26; a++)group_list[a][0]=a+'a';\r
- n_groups=26;\r
- aa_group=1;\r
- }\r
- \r
- \r
- for ( a=0; a<len; a++)\r
- {\r
- group=vcalloc (n_groups+1, sizeof (int));\r
- for (best_group=0,b=0; b< ns; b++)\r
- {\r
- if ( !is_gap(A->seq_al[ls[b]][a]))\r
- {\r
- for (c=0; c< n_groups; c++)\r
- if ( is_in_set (tolower(A->seq_al[ls[b]][a]), group_list[c]))\r
- {group[c]++;\r
- best_group=(group[c]>group[best_group])?c:best_group;\r
- }\r
- }\r
- seq[a]=group_list[best_group][0];\r
- }\r
- vfree (group);\r
- }\r
- seq[a]='\0';\r
- if ( aa_group) free_char (group_list, -1);\r
-\r
- if ( clean_ls)vfree(ls);\r
- \r
- return seq;\r
- }\r
-\r
-Alignment *aln2conservation ( Alignment *A, int threshold,char *seq)\r
-{\r
- int a, b, c, d, i, c1, c2;\r
- int *pos;\r
- float *eval;\r
- float tot=0;\r
- float tn=0;\r
- int **sim;\r
- int w=0;\r
- \r
- pos =vcalloc (A->len_aln, sizeof (int));\r
- eval=vcalloc (A->len_aln, sizeof (int));\r
- sim=aln2sim_mat (A, "idmat");\r
- if (seq)i=name_is_in_list (seq, A->name, A->nseq, 100);\r
- else i=0;\r
- \r
- if ( i==-1) {HERE ("%s is an unknown:sequence [FATAL]"); exit (EXIT_FAILURE);}\r
- \r
- for (a=0; a<A->len_aln; a++)\r
- {\r
- double s;\r
- int e;\r
- for (c=0,e=a-w; e<=a+w; e++)\r
- {\r
- if (e<0 || e==A->len_aln)continue;\r
- c1=toupper (A->seq_al[i][e]);\r
- for (b=0; b<A->nseq; b++)\r
- {\r
- c2=toupper (A->seq_al[b][a]);\r
- if (c1==c2)\r
- { \r
- c++;\r
- s=(double)((double)sim[i][b]/(double)(100));\r
- \r
- }\r
- else\r
- {\r
- s=(double)(((double)100-(double)sim[i][b])/(double)(100));\r
- }\r
- eval[a]+=(s==0)?0:log(s);\r
- }\r
- }\r
- pos[a]=(c*100)/A->nseq;\r
- if (!is_gap(c1)){tot+=pos[a]; tn++;}\r
- \r
- if (pos[a]>=threshold)A->seq_al[i][a]=toupper (A->seq_al[i][a]);\r
- else A->seq_al[i][a]=tolower (A->seq_al[i][a]);\r
- }\r
- fprintf (stdout, ">%s %s [i=%d]\n%s\n", A->name[i],A->aln_comment[i],i, A->seq_al[i]);\r
- tot=(tn>0)?(float)tot/(float)tn:0;\r
- \r
- for (d=0,a=0; a<A->len_aln; a++)\r
- {\r
- fprintf (stdout, "# %c %4d", A->seq_al[i][a],pos[a]);\r
-\r
- \r
- if ( !is_gap (A->seq_al[i][a]))\r
- {\r
- fprintf (stdout, " LogOdd: %6.2f ", (tot==0 || pos[a]==0)?0:(float)log((float)pos[a]/tot));\r
- fprintf ( stdout, " Pos: %5d E-Val: %9.2f", ++d, eval[a]/(A->nseq));\r
- }\r
- fprintf ( stdout, "\n");\r
- }\r
- fprintf ( stdout, "#average conservation: %.2f", tot);\r
- exit (EXIT_SUCCESS);\r
-}\r
-char *aln2cons_seq_mat ( Alignment *A, char *mat_name)\r
-{\r
- return sub_aln2cons_seq_mat (A, A->nseq, NULL, mat_name);\r
-}\r
-char *sub_aln2cons_seq_mat2 ( Alignment *A,int ns, char **ls, char *mat_name)\r
-{\r
- char *cons;\r
- int *list;\r
- list=name_array2index_array(ls, ns, A->name, A->nseq);\r
- cons=sub_aln2cons_seq_mat ( A,ns, list, mat_name);\r
- vfree (list);\r
- return cons;\r
-}\r
-\r
-char *sub_aln2cons_seq_mat ( Alignment *A,int ns, int *ls, char *mat_name)\r
-{\r
- int a, b, c, s;\r
- char *seq, r1, r2;\r
- int **mat;\r
- int score=0, best_score=0, best_r=0;\r
- int len;\r
- int naa;\r
- \r
- mat=read_matrice (mat_name);\r
- len=strlen ( A->seq_al[(ls==NULL)?0:ls[0]]);\r
- seq=vcalloc (len+1, sizeof (char));\r
- for ( a=0; a<len; a++) \r
- {\r
- for (b=0; b<20; b++)\r
- {\r
- r1=AA_ALPHABET[b];\r
- for ( naa=0,score=0,c=0; c<ns; c++)\r
- {\r
- s=(ls==NULL)?c:ls[c];\r
- if ( ls && ls[c]==-1) continue;\r
- else if (is_gap(A->seq_al[s][a]))continue;\r
- else \r
- {\r
- naa++;\r
- r2=A->seq_al[s][a];\r
- score+=mat[r1-'A'][r2-'A'];\r
- }\r
- }\r
- if (naa==0)best_r='-';\r
- if ( b==0 || score>best_score){best_score=score; best_r=r1;}\r
- }\r
- seq[a]=best_r;\r
- }\r
- free_int (mat, -1);\r
- return seq;\r
-}\r
-\r
-int seq_list2in_file ( TC_method *M, Sequence *S, char *list, char *file)\r
-{\r
- X_template *T=NULL;\r
- \r
- if ( !S)return 0;\r
- else\r
- {\r
- int t;\r
- t=tolower(M->seq_type[0]);\r
- \r
- if ( t=='s')\r
- {\r
- return seq_list2fasta_file ( S, list, file);\r
- \r
- }\r
- else\r
- {\r
- FILE *fp, *fp2;\r
- int a, n, s, c;\r
- int *slist;\r
-\r
-\r
- \r
- fp=vfopen ( file, "w");\r
- slist=string2num_list (list);\r
- n=slist[0];\r
- \r
- if (strlen (M->seq_type) >1)\r
- {\r
- add_warning( stderr, "\nERROR: Mixed seq_type not supported for external methods\n[FATAL:%s]", PROGRAM);\r
- }\r
- \r
- for ( a=2; a<n; a++)\r
- {\r
- s=slist[a];\r
- if (t=='p')T=(S->T[s])->P;\r
- else if (t=='r')T=(S->T[s])->R;\r
- else if (t=='g')T=(S->T[s])->G;\r
- \r
- if (!T && t=='r')\r
- {\r
- fprintf ( fp, ">%s\n%s%s", S->name[s], S->seq[s], LINE_SEPARATOR);\r
- }\r
- else if ( T && T->template_file && T->template_file[0])\r
- {\r
- fp2=vfopen (T->template_file, "r");\r
- while ( (c=fgetc (fp2))!=EOF)\r
- {\r
- fprintf ( fp, "%c", c);\r
- }\r
- fprintf (fp, "%s", LINE_SEPARATOR);\r
- vfclose (fp2);\r
- }\r
- }\r
-\r
- fprintf (fp, "TARGET_SEQ_NAME: ");\r
- for (a=2; a<n; a++)fprintf ( fp, "%s ", (S->name[slist[a]]));\r
- fprintf ( fp, "%s", LINE_SEPARATOR);\r
- \r
- vfclose (fp); vfree (slist);\r
- \r
- }\r
-\r
- return 1;\r
- }\r
-}\r
-\r
-int seq_list2fasta_file( Sequence *S, char *list, char *file)\r
- {\r
- FILE *fp;\r
- int n, a, s;\r
- static char *buf;\r
- static int blen;\r
- int l;\r
- \r
- \r
- /*Buf is used because cmalloced functions cannot go through strtok*/\r
- if ( !S)return 0;\r
- else\r
- {\r
- fp=vfopen ( file, "w");\r
- if ( !list)\r
- {\r
- for ( a=0; a<S->nseq; a++)\r
- {\r
- fprintf ( fp, ">%s %s\n%s\n", decode_name (S->name[a], CODE),S->name[a], S->seq[a]);\r
- }\r
- }\r
- else\r
- {\r
- l=strlen (list);\r
- if ( l>blen)\r
- {\r
- if (buf)vfree(buf);\r
- buf=vcalloc ( strlen (list)+1, sizeof (char));\r
- sprintf ( buf, "%s", list);\r
- blen=l;\r
- }\r
- n=atoi(strtok (list,SEPARATORS));\r
- for ( a=0; a< n; a++)\r
- {\r
- s=atoi(strtok (NULL, SEPARATORS));\r
- fprintf ( fp, ">%s %s\n%s\n", decode_name (S->name[s], CODE), S->name[a],S->seq[s]);\r
- }\r
- }\r
- vfclose (fp);\r
- }\r
- return 1;\r
- }\r
-Structure * seq2struc ( Sequence *S, Structure *ST)\r
- {\r
- int a, b;\r
- \r
- for ( a=0; a< S->nseq; a++)\r
- for ( b=0; b< S->len[a]; b++)\r
- ST->struc[a][b+1][ST->n_fields-1]=S->seq[a][b];\r
- return ST;\r
- }\r
-\r
-void aln2struc (Alignment *A, Structure *ST) \r
- {\r
- int a, b, c;\r
-\r
- for ( a=0; a< A->nseq; a++)\r
- for (c=0, b=0; b< A->len_aln; b++)\r
- {\r
- if ( !is_gap (A->seq_al[a][b]))\r
- {\r
- ST->struc[a][c][ST->n_fields-1]=A->seq_al[a][b];\r
- c++;\r
- }\r
- }\r
- }\r
-Alignment *stack_aln (Alignment *A, Alignment *B)\r
- {\r
- int a,b;\r
- int max_len=0, max_nseq=0;\r
- if ( B==NULL)return A;\r
- if ( A==NULL)return B;\r
- \r
- max_nseq=A->nseq+B->nseq;\r
- for (a=0; a< A->nseq; a++)max_len=MAX(strlen(A->seq_al[a]),max_len);\r
- for (a=0; a< B->nseq; a++)max_len=MAX(strlen(B->seq_al[a]),max_len);\r
- \r
- A=realloc_aln2 ( A,max_nseq,max_len+1);\r
- \r
- for (a=A->nseq,b=0; b< B->nseq; b++, a++)\r
- {\r
- sprintf ( A->seq_comment[a] , "%s", B->seq_comment[b]);\r
- sprintf ( A->aln_comment[a] , "%s", B->aln_comment[b]);\r
- \r
- sprintf ( A->seq_al [a] , "%s", B->seq_al [b]);\r
- sprintf ( A->name [a] , "%s", B->name[b]);\r
- sprintf ( A->file [a], "%s" , B->file[b]);\r
- A->order[a][0]=B->order[b][0];\r
- A->order[a][1]=B->order[b][1];\r
- A->score_seq[a]=B->score_seq[b];\r
- A->len[a]=B->len[b];\r
- }\r
- \r
- A->len_aln=MAX(A->len_aln, B->len_aln);\r
- A->nseq=A->nseq+B->nseq;\r
- A->score_aln=A->score_aln+B->score_aln;\r
- \r
- A->finished=A->finished+B->finished;\r
- return A;\r
- }\r
- \r
-Alignment *chseqIaln(char *name, int seq_n, int start,int len,Sequence *S, int seqIaln, Alignment *A)\r
- {\r
- char *seq;\r
-\r
- seq=extract_char ( S->seq[seq_n], start, len);\r
- A=realloc_aln2 (A, (A==NULL)?(seqIaln+1):MAX(A->nseq,seqIaln+1), ((A==NULL)?(strlen (seq)):MAX(strlen (seq),A->len_aln))+1);\r
- \r
- \r
- sprintf ( A->seq_al[seqIaln], "%s",seq);\r
-\r
- \r
- A->order[seqIaln][0]=seq_n;\r
- A->order[seqIaln][1]=start;\r
- sprintf ( A->name[seqIaln], "%s", name);\r
- A->nseq=MAX(A->nseq, seqIaln+1);\r
- A->len_aln=return_maxlen(A->seq_al, A->nseq);\r
- A->S=S;\r
- vfree (seq);\r
- return A;\r
- }\r
-\r
-Alignment * aln_gap2random_aa(Alignment *A)\r
- {\r
- int a, b,l;\r
- char alp[200];\r
- \r
- if (strm ( (A->S)->type, "PROTEIN"))\r
- sprintf ( alp, "acefghiklmnpqrstuvwy");\r
- else if ( strm ( (A->S)->type, "DNA") ||strm ( (A->S)->type, "RNA") )\r
- sprintf ( alp, "agct");\r
- l=strlen (alp);\r
- \r
- \r
- for (a=0; a<A->nseq; a++)\r
- for ( b=0; b<A->len_aln; b++)\r
- if ( is_gap (A->seq_al[a][b]))A->seq_al[a][b]=alp[(int)rand()%(l)];\r
- return A;\r
- }\r
-\r
-Alignment * make_random_aln(Alignment *A,int nseq, int len, char *alphabet)\r
- {\r
- int a;\r
- \r
-\r
- A=realloc_aln2(A, nseq, len+1);\r
-\r
- A->nseq=0;\r
- A->len_aln=len;\r
- for ( a=0; a< A->nseq; a++)sprintf ( A->file[a], "random alignment");\r
- for ( a=0; a< nseq; a++)\r
- A=add_random_sequence2aln(A,alphabet);\r
- return A;\r
- }\r
-Alignment * add_random_sequence2aln( Alignment *A, char *alphabet)\r
- {\r
- int a, n;\r
-\r
- vsrand(0);\r
-\r
- n=strlen(alphabet);\r
- A=realloc_alignment2 (A, A->nseq+1, A->len_aln+1);\r
- \r
- for ( a=0; a< A->len_aln; a++)A->seq_al[A->nseq][a]=alphabet[rand()%n];\r
- if (! A->name[A->nseq][0])\r
- {\r
- for ( a=0; a<10; a++)A->name[A->nseq][a]=alphabet[rand()%n];\r
- A->name[A->nseq][a]='\0';\r
- }\r
- \r
- A->nseq++;\r
- return A;\r
- }\r
-\r
-Sequence *get_defined_residues( Alignment *A)\r
- {\r
- char *buf;\r
- Sequence *S;\r
- int a, b, s, l, r;\r
- if ( !A || !A->S) return NULL;\r
-\r
- S=duplicate_sequence (A->S);\r
- for ( a=0; a< S->nseq; a++)\r
- for ( b=0; b< S->len[a]; b++)S->seq[a][b]=UNDEFINED_RESIDUE;\r
- buf=vcalloc(A->len_aln+1,sizeof (char));\r
- for ( a=0; a< A->nseq; a++)\r
- {\r
- sprintf ( buf, "%s",A->seq_al[a]);\r
- ungap(buf);\r
- l=strlen (buf);\r
- s=A->order[a][0];\r
- \r
- for ( b=1; b<= l; b++)\r
- {\r
- r=A->seq_cache[s][b];\r
- \r
- if ( r>=0)S->seq[s][r-1]=(A->S)->seq[s][r-1];\r
- }\r
- }\r
- vfree(buf);\r
- return S;\r
- }\r
-Alignment *thread_defined_residues_on_aln ( Alignment *A, Sequence *S1)\r
- {\r
- int a, b;\r
- int gap, r,s, r2;\r
- for ( a=0; a< A->nseq; a++)\r
- {\r
- s=A->order[a][0];\r
- r=A->order[a][1];\r
- for (b=0;b< A->len_aln; b++)\r
- {\r
- gap=is_gap(A->seq_al[a][b]);\r
- \r
- if (!gap)\r
- {\r
- r+=!gap;\r
- r2=A->seq_cache[s][r]-1;\r
- \r
- if (r2>=0 && S1->seq[s][r2]==UNDEFINED_RESIDUE)\r
- A->seq_al[a][b]=UNDEFINED_RESIDUE;\r
- }\r
- }\r
- }\r
- return A;\r
- }\r
-\r
-int ** trim_aln_borders (char **seq1, char **seq2, int nseq)\r
- {\r
- int a, b, c,l1,l2;\r
- char *buf1;\r
- char *buf2;\r
- int max;\r
-\r
- \r
-\r
- \r
- max=MAX(get_longest_string (seq1,-1, NULL, NULL),get_longest_string (seq2,-1, NULL, NULL))+1;\r
- buf1=vcalloc ( max, sizeof(char));\r
- buf2=vcalloc ( max, sizeof(char));\r
- \r
- for ( a=0; a< nseq; a++)\r
- {\r
- sprintf ( buf1, "%s", seq1[a]);\r
- sprintf ( buf2, "%s", seq2[a]);\r
-\r
-\r
- \r
- ungap (buf1);\r
- ungap (buf2);\r
-\r
- if (str_overlap ( buf1, buf2,'*')!=0)\r
- { \r
- l1=strlen ( seq1[a]);\r
- l2=strlen ( seq2[a]);\r
- for ( b=0,c=0; c< l1; c++)\r
- if ( !is_gap(seq1[a][c]))seq1[a][c]=buf1[b++];\r
- seq1[a][c]='\0';\r
- for ( b=0,c=0; c< l2; c++)\r
- if ( !is_gap(seq2[a][c]))seq2[a][c]=buf2[b++]; \r
- seq2[a][c]='\0';\r
- }\r
- }\r
- vfree (buf1);\r
- vfree (buf2);\r
- return NULL;\r
-\r
- }\r
-Sequence * merge_seq ( Sequence *IN, Sequence *OUT)\r
- {\r
- int a;\r
- \r
- if ( OUT==NULL)return duplicate_sequence (IN);\r
- else\r
- {\r
- if ( IN && check_list_for_dup( IN->name, IN->nseq))\r
- {\r
- fprintf ( stderr, "\nERROR: %s is duplicated in file %s[FATAL]\n", check_list_for_dup( IN->name, IN->nseq), IN->file[0]);\r
- myexit (EXIT_FAILURE);\r
- }\r
- for ( a=0; a< IN->nseq; a++)\r
- if ((OUT=add_sequence ( IN, OUT, a))==NULL)return NULL;\r
- return OUT;\r
- }\r
- }\r
-\r
-Alignment *seq_name2removed_seq_name(Sequence *S, Alignment *NA, float **diff)\r
-{\r
- int a, b, rb, s;\r
- float min_diff;\r
- for (a=0; a< S->nseq; a++)\r
- {\r
- if (name_is_in_list( S->name[a], NA->name, NA->nseq, 100)!=-1) continue;\r
- for ( min_diff=100, s=0, b=0; b< NA->nseq; b++)\r
- {\r
- rb=name_is_in_list ( NA->name[b], S->name, S->nseq, 100);\r
- if ( diff[a][rb]<min_diff)\r
- { \r
- s=b;\r
- min_diff=diff[a][rb];\r
-\r
- }\r
- }\r
- strcat ( NA->seq_comment[s], " ");\r
- strcat ( NA->seq_comment[s], S->name[a]);\r
- }\r
- return NA;\r
-}\r
- \r
- \r
- \r
- \r
-int seq_name2index (char *name, Sequence *S)\r
-{\r
- if ( !S) return -1;\r
- else return name_is_in_list ( name, S->name, S->nseq, MAXNAMES+1);\r
-}\r
-char * seq_name2coor ( char *s, int *start, int *end, char sep)\r
-{\r
- /*name|start|end */\r
- char n1[100], n2[100];\r
- int a=0, b=0, c=0;\r
- \r
- n1[0]=n2[0]='\0';\r
- start[0]=end[0]=0;\r
- \r
- while ( s[a]!=sep && s[a]!='\0')a++;\r
- if ( s[a]=='\0')return s;\r
- else \r
- s[a++]='\0';\r
-\r
- \r
- \r
- while ( s[a]!=sep && s[a]!='\0')n1[b++]=s[a++];\r
- \r
- if ( s[a]=='\0'){n1[b]='\0';if ( n1[0])start[0]=atoi(n1);return s;}\r
- else s[a++]=n1[b]='\0';\r
- \r
- \r
- while ( s[a]!=sep && s[a]!='\0')n2[c++]=s[a++];\r
- n2[c]='\0';\r
-\r
- \r
- if ( n1[0])start[0]=atoi(n1);\r
- if ( n2[0])end[0]=atoi(n2);\r
-\r
-\r
- return s;\r
-}\r
- \r
-Sequence *extract_one_seq(char *n,int start, int end, Alignment *S, int keep_name)\r
- {\r
- \r
- int seq, a;\r
- FILE*fp;\r
- char *name;\r
- Sequence *OUT_S;\r
- \r
-\r
- if ( n[0]=='#')seq=S->nseq;\r
- else if ( (seq=name_is_in_list (n, S->name, S->nseq, 100)+1)!=0);\r
- else if (is_number (n) && (seq=atoi(n))!=0) seq=atoi(n);\r
- else\r
- {\r
- fprintf ( stderr, "\nCould not find Sequence %s [FATAL]", n);\r
- myexit (EXIT_FAILURE);\r
- }\r
- seq--;\r
- \r
- name=vtmpnam ( NULL);\r
- fp=vfopen ( name, "w");\r
- if ( start && end &&!keep_name)fprintf (fp, ">%s_%d_%d\n",S->name[seq],start, end);\r
- else if ( start && end==0 && !keep_name)fprintf (fp, ">%s_%d_%d\n",S->name[seq],start,(int)strlen ( S->seq_al[seq]));\r
- else fprintf (fp, ">%s\n", S->name[seq]);\r
- \r
- if ( start==0 && end==0){fprintf (fp, "%s\n", S->seq_al[seq]);}\r
- else if (end==0){fprintf (fp, "%s\n", S->seq_al[seq]+start-1);}\r
- else\r
- {\r
- for ( a=start-1; a<end; a++){fprintf ( fp, "%c", S->seq_al[seq][a]);}\r
- fprintf ( fp, "\n");\r
- }\r
- \r
- \r
- vfclose (fp);\r
- OUT_S=get_fasta_sequence_num (name, NULL);\r
- \r
- return OUT_S;\r
- }\r
- \r
-\r
- \r
-Sequence * extract_sub_seq( Sequence *COOR, Sequence *S)\r
- {\r
- int a, b, c,s;\r
- int start, end;\r
- \r
- for ( a=0; a< S->nseq; a++)\r
- {\r
- if ( (s=name_is_in_list ( S->name[a], COOR->name, COOR->nseq, 100))!=-1)\r
- {\r
- \r
- sscanf ( COOR->seq_comment[s], "%d %d", &start, &end);\r
- for (c=0,b=start-1; b< end; b++, c++)S->seq[a][c]=S->seq[a][b];\r
- S->seq[a][c]='\0';\r
- sprintf ( S->seq_comment[a], "%s",COOR->seq_comment[s]);\r
- \r
- }\r
- }\r
- S=reorder_seq ( S, COOR->name, COOR->nseq);\r
- return S;\r
- }\r
- \r
-\r
-\r
-char * aln_column2string (Alignment *A, int p)\r
- {\r
- char *s;\r
- int a;\r
- if (p>=A->len_aln)\r
- {\r
- HERE ("ERROR: index (p=%d) loger than aln (l=%d) [FATAL]", p, A->len_aln);\r
- exit (EXIT_FAILURE);\r
- }\r
- else\r
- {\r
- s=vcalloc (A->nseq+1, sizeof (char));\r
- for (a=0; a< A->nseq; a++)s[a]=A->seq_al[a][p];\r
- }\r
- return s;\r
- }\r
-Alignment * fix_aln_seq ( Alignment *A, Sequence *S)\r
- {\r
- int a, b, c;\r
- char *buf1, *buf2;\r
- int g0, g1, nr0, nr1;\r
- int id, tot;\r
- Alignment *B;\r
-\r
-\r
- /*This function establishes the correspondance between every (1..N+1) residue of each aligned sequence\r
- and its correspondance in S:\r
- A->seq_cache[a][b]=x means that residue b of aligned sequence a corresponds to residue x of the sequence with tye same index in S\r
- A->seq_cache[a][b]=0 means there is no correspondance.\r
- a is the index of the sequence\r
- Applying this function is needed for turning an alignment into a constraint list\r
- */\r
- \r
-\r
- if ( S==NULL)return A;\r
- \r
- A->seq_cache=declare_int ( S->nseq, MAX((A->len_aln+1), S->max_len+1));\r
- \r
- for (a=0; a< S->nseq; a++)\r
- for ( b=0; b< A->len_aln; b++)A->seq_cache[a][b]=-1;\r
- \r
- buf1=buf2=NULL;\r
- for ( a=0; a< S->nseq; a++)\r
- {\r
- for (b=0; b< A->nseq; b++) \r
- {\r
- if (strm ( S->name[a], A->name[b]))\r
- {\r
- A->order[b][0]=a;\r
- \r
- vfree (buf1);\r
- buf1=vcalloc ( A->len_aln+1, sizeof (char));\r
- sprintf (buf1, "%s", A->seq_al[b]);\r
- ungap (buf1);\r
- upper_string (buf1);\r
- \r
- vfree(buf2);\r
- buf2=vcalloc (strlen(S->seq[a])+1, sizeof (char));\r
- sprintf (buf2, "%s",S->seq[a]);\r
- ungap (buf2);\r
- upper_string (buf2);\r
- \r
- \r
-\r
- if ( strm (buf1,buf2))\r
- {\r
- \r
- for ( c=0; c<S->len[a]; c++)A->seq_cache[a][c+1]=c+1;\r
- }\r
- else\r
- {\r
- \r
- B=align_two_sequences (buf2,buf1,"blosum62mt",-4,-1, "myers_miller_pair_wise");\r
- if ( getenv ("DEBUG_RECONCILIATION"))\r
- {\r
- fprintf (stderr, "\n[DEBUG_RECONCILIATION:fix_aln_seq]\nReconciliation of %s\nA=Ref_sequence\nB=New_seq", S->name[a]);\r
- print_aln (B);\r
- }\r
- \r
- for (id=0, tot=0,nr0=0,nr1=0,c=0; c<B->len_aln; c++)\r
- {\r
- g0=is_gap(B->seq_al[0][c]);\r
- g1=is_gap(B->seq_al[1][c]);\r
- nr0+=1-g0;\r
- nr1+=1-g1;\r
- if ( !g0 && !g1)\r
- {\r
- tot++;\r
- id+=(B->seq_al[0][c]==B->seq_al[1][c])?1:0;\r
- A->seq_cache[a][nr1]=nr0;\r
- }\r
- else if (g0 && !g1)\r
- {\r
- A->seq_cache[a][nr1]=0;\r
- }\r
- }\r
- if ( ((id*100)/tot)<20)\r
- {\r
- print_aln (B);\r
- fprintf ( stderr, "\nTwo different sequences have the same name: %s", S->name[a]); \r
- fprintf ( stderr, "\nIf %s is a PDBID, Make sure it identifies the right chain (A, B, 1, 2...)", S->name[a]);\r
- fprintf ( stderr, "\nChain number or index must be added to the PDB id (i.e. 1gowA)");\r
- fprintf ( stderr, "\nIf You want to use %s anyway, rename it with a non-PDB identifier such as seq_%s\n",S->name[a],S->name[a]); \r
- myexit (EXIT_FAILURE);\r
- }\r
-\r
- free_sequence ( B->S, -1);\r
- free_aln (B);\r
- }\r
- \r
- }\r
- }\r
- }\r
- vfree(buf1);vfree(buf2);\r
- return A;\r
- }\r
-\r
-Sequence * add_prf2seq ( char *file, Sequence *S)\r
- {\r
- char **new_seq;\r
- Sequence *NS;\r
- \r
- if ( !is_aln (file)&& !is_seq (file))return S;\r
- else\r
- {\r
- X_template *R;\r
- Alignment *A;\r
- \r
-\r
- R=fill_R_template(file,file, S);\r
- \r
- A=(R->VR)->A;\r
- ((R->VR)->A)->expand=1;\r
- new_seq=declare_char (1,A->len_aln+1);\r
- sprintf ( new_seq[0], "%s",aln2cons_seq_mat(A, "blosum62mt"));\r
- \r
- NS=fill_sequence_struc(1, new_seq,A->file);\r
- S=add_sequence (NS, S, 0);\r
- (S->T[S->nseq-1])->R=R;\r
- \r
- free_sequence (NS, NS->nseq);\r
- free_char( new_seq, -1);\r
- \r
- return S;\r
- }\r
- }\r
-int prf_in_seq ( Sequence *S)\r
-{\r
- int a;\r
- \r
- if ( !S) return 0;\r
- else \r
- {\r
- for ( a=0; a< S->nseq; a++)\r
- if (seq2R_template_profile(S, a)) return 1;\r
- }\r
- return 0;\r
-}\r
-Sequence * add_sequence ( Sequence *IN, Sequence *OUT, int i)\r
- {\r
- int s, a;\r
- \r
- char *buf;\r
- if (OUT==NULL)\r
- {\r
- \r
- OUT=duplicate_sequence (IN);\r
- return OUT;\r
- }\r
- for (a=0; a<OUT->nseq; a++)\r
- {\r
- Alignment *P;\r
- P=seq2R_template_profile (OUT, a);\r
- if (!P) continue;\r
- else if (name_is_in_list (IN->name[i], P->name, P->nseq, 100)!=-1) return OUT;\r
- }\r
- \r
- /*Adds sequence i of IN at the end of OUT*/\r
- \r
- if ((s=name_is_in_list ( IN->name[i], OUT->name, OUT->nseq,STRING))==-1 )\r
- {\r
- OUT=realloc_sequence (OUT, OUT->nseq+1, IN->len[i]); \r
- sprintf ( OUT->name[OUT->nseq],"%s",IN->name[i]);\r
- sprintf ( OUT->file[OUT->nseq],"%s",IN->file[i]);\r
- sprintf ( OUT->seq_comment[OUT->nseq],"%s",IN->seq_comment[i]);\r
- sprintf ( OUT->aln_comment[OUT->nseq],"%s",IN->aln_comment[i]);\r
- \r
- sprintf ( OUT->seq[OUT->nseq],"%s",IN->seq[i]);\r
- OUT->len[OUT->nseq]=IN->len[i];\r
- OUT->T[OUT->nseq][0]=IN->T[i][0];\r
- OUT->nseq++;\r
- return OUT;\r
- }\r
- else if ( s!=-1 && !case_insensitive_strcmp ( IN->seq[i], OUT->seq[s]))\r
- {\r
-\r
- if ( getenv4debug("DEBUG_RECONCILIATION"))fprintf ( stderr,"[DEBUG_RECONCILIATION:add_sequence]\n%s\n%s\n", IN->seq[i], OUT->seq[s]);\r
- \r
- add_warning (stderr, "WARNING: DISCREPANCY:%s in [%s] and [%s]\n", IN->name[i], IN->file[i], OUT->file[s]);\r
- \r
- \r
- if (((buf=build_consensus(IN->seq[i], OUT->seq[s],"cfasta_pair_wise" ))!=NULL)||((buf=build_consensus(IN->seq[i], OUT->seq[s],"myers_miller_pair_wise" ))!=NULL))\r
- {\r
- \r
- OUT->max_len=MAX(OUT->max_len, strlen(buf));\r
- OUT->min_len=MIN(OUT->min_len, strlen(buf));\r
- OUT->seq =realloc_char ( OUT->seq, -1, -1,OUT->nseq,OUT->max_len+1);\r
- \r
- sprintf ( OUT->seq[s],"%s",buf);\r
- OUT->len[s]=strlen (buf);\r
- vfree (buf);\r
- return OUT;\r
- }\r
- else\r
- {\r
- fprintf ( stderr, "IMPOSSIBLE TO RECONCILIATE SOME SEQUENCES[FATAL:%s]\n", PROGRAM);\r
- print_aln ( align_two_sequences (IN->seq[i], OUT->seq[s], "idmat", 0, 0, "fasta_pair_wise"));\r
- myexit (EXIT_FAILURE);\r
- return NULL;\r
- }\r
- \r
- }\r
- else\r
- {\r
- return OUT;\r
- }\r
- }\r
- \r
-\r
-Sequence * trim_seq ( Sequence *A, Sequence *B)\r
- {\r
- int a;\r
- Sequence *R;\r
- \r
- if (A->nseq>B->nseq)\r
- {\r
- Sequence *I;\r
- I=A;A=B;B=I;\r
- }\r
-\r
- R=declare_sequence (MIN(A->min_len,B->min_len), MAX(A->max_len, B->max_len), MIN(A->nseq, B->nseq));\r
- R->nseq=0;\r
- \r
- for (a=0; a< A->nseq; a++)\r
- { \r
- if ( name_is_in_list ( A->name[a], B->name, B->nseq,STRING+1)!=-1)\r
- {\r
- sprintf ( R->name[R->nseq], "%s", A->name[a]);\r
- sprintf ( R->seq[R->nseq], "%s", A->seq[a]);\r
- sprintf ( R->file[R->nseq], "%s", A->file[a]);\r
- sprintf ( R->aln_comment[R->nseq], "%s", A->aln_comment[a]);\r
- sprintf ( R->seq_comment[R->nseq], "%s", A->seq_comment[a]);\r
- \r
- R->len[R->nseq]=A->len[a];\r
- R->nseq++;\r
- }\r
- }\r
- return R;\r
- }\r
-\r
-Sequence * trim_aln_seq ( Alignment *A, Alignment *B)\r
- {\r
- int a;\r
- static char **name_list;\r
- int n=0;\r
- Sequence *SA, *SB;\r
- int **cache_A=NULL;\r
- int **cache_B=NULL;\r
- int * p;\r
-\r
- /*This function inputs two alignments A and B\r
- It removes sequences that are not common to both of them\r
- It rearange the sequences so that they are in the same order\r
- A decides on the order\r
- The Sequences (A->S) and (B->S) are treated the same way\r
- Sequences are also merged in order to detects discrepencies.\r
- A pointer to S is returned\r
- */\r
- if (name_list)free_char (name_list, -1);\r
- name_list=declare_char (MAX(A->nseq, B->nseq), STRING+1);\r
- \r
- for ( a=0; a< A->nseq; a++)\r
- { \r
- if ( name_is_in_list ( A->name[a], B->name, B->nseq,STRING)!=-1)\r
- {\r
- sprintf ( name_list[n++], "%s", A->name[a]);\r
- }\r
- }\r
- \r
- \r
- \r
- reorder_aln ( A, name_list, n);\r
- if (A->seq_cache)cache_A=duplicate_int (A->seq_cache, -1, -1);\r
- if (B->seq_cache)cache_B=duplicate_int (B->seq_cache, -1, -1);\r
- reorder_aln ( B, name_list, n);\r
- for ( a=0; a< n; a++)\r
- {\r
- if ( cache_A)\r
- {\r
- p=A->seq_cache[A->order[a][0]];\r
- A->seq_cache[A->order[a][0]]=cache_A[a];\r
- cache_A[a]=p;\r
- }\r
- if ( cache_B)\r
- {\r
- p=B->seq_cache[B->order[a][0]];\r
- B->seq_cache[B->order[a][0]]=cache_B[a];\r
- cache_B[a]=p;\r
- } \r
- A->order[a][0]=B->order[a][0]=a;\r
- }\r
- free_int(A->seq_cache, -1);\r
- free_int(B->seq_cache, -1);\r
-\r
- A->seq_cache=cache_A;\r
- B->seq_cache=cache_B;\r
- \r
-\r
- \r
- SA=aln2seq(A);\r
- SB=aln2seq(B);\r
- \r
- A->S=B->S=merge_seq (SA, SB);\r
- return A->S;\r
- }\r
-Sequence * trim_aln_seq_name ( Alignment *A, Alignment *B)\r
- {\r
- int a;\r
- Sequence *S;\r
- \r
- /*This function inputs two alignments A and B\r
- It removes sequences that are not common to both of them\r
- It rearange the sequences so that they are in the same order\r
- A decides on the order\r
- */\r
- S=declare_sequence ( 1, 1, A->nseq+B->nseq);\r
- S->nseq=0;\r
- for ( a=0; a< A->nseq; a++)\r
- { \r
- if ( name_is_in_list ( A->name[a], B->name, B->nseq,STRING)!=-1)\r
- {\r
- sprintf ( S->name[S->nseq++], "%s", A->name[a]);\r
- }\r
- }\r
- return S;\r
- }\r
-\r
-\r
-\r
-char ** rm_name_tag (char **name, int nseq, char *tag)\r
-{\r
- int a , b, ntag;\r
- char **tag_list;\r
- char *s;\r
- char **template_list; \r
- if ( !name )return NULL;\r
-\r
- tag_list=declare_char (10, 4);\r
-\r
- if ( tag)\r
- {\r
- ntag=1; sprintf ( tag_list[0], "%s", tag);\r
- }\r
- else\r
- {\r
- ntag=0;\r
- sprintf ( tag_list[ntag++], "_S_");\r
- sprintf ( tag_list[ntag++], "_G_");\r
- }\r
- template_list=declare_char (nseq, 100);\r
- for ( a=0; a<nseq ; a++)\r
- {\r
- for ( b=0; b<ntag; b++)\r
- {\r
- s=strstr(name[a], tag_list[b]);\r
- if ( s)\r
- {\r
- s[0]='\0';\r
- s[2]='\0';\r
- sprintf ( template_list[a], ">%s _%s_ %s", name[a], s+1, s+3);\r
- break;\r
- }\r
- }\r
- }\r
- \r
- free_char (tag_list, -1); \r
- return template_list;\r
-}\r
-Sequence * swap_header ( Sequence *S, Sequence *H)\r
-{\r
- int a, b, n;\r
-\r
- for ( a=0; a< S->nseq; a++)\r
- {\r
- if ( (n=name_is_in_list (S->name[a],H->name, H->nseq, 1000))!=-1)\r
- {\r
- char **list;\r
-\r
- \r
- list=string2list (H->seq_comment[n]);\r
- if ( list==NULL || atoi(list[0])==1)continue;\r
- S->seq_comment[a]='\0';\r
- sprintf (S->name[a], "%s%s%s",H->name[n], list[1], list[2]);\r
- vfree ( S->seq_comment[a]);S->seq_comment[a]=vcalloc ( strlen (H->seq_comment[n])+1, sizeof (char));\r
- for (b=3; b< atoi(list[0]); b++)S->seq_comment[a]=strcat (S->seq_comment[a], list[b]);\r
- free_char (list, -1);\r
- }\r
- }\r
- return S;\r
-}\r
-\r
-\r
-Sequence * profile_seq2template_seq ( Sequence *S, char *template_file, Fname *F)\r
-{\r
- /*This function fetches potential templates associated with sequences within a profile*/\r
- int i;\r
- Alignment *A;\r
- char *tmp;\r
-\r
- if ( !check_file_exists (template_file)) return S;\r
- tmp=vtmpnam (NULL);\r
- for ( i=0; i< S->nseq; i++)\r
- {\r
- if ( (A=seq2R_template_profile (S, i)))\r
- {\r
- printf_system ("cp %s %s", template_file, tmp);//seq2template over-writes the temnplate file with a list of the templates effectively encounter\r
- A->S=aln2seq (A);\r
- A->S=seq2template_seq (A->S, tmp, F); \r
- if (!A->S)return NULL;\r
- }\r
- }\r
-\r
- return S;\r
-}\r
- \r
-Sequence * seq2template_type(Sequence *Seq)\r
-{\r
- //add template\r
- int a, e;\r
- int s;\r
- struct X_template *S=NULL;\r
- struct X_template *P=NULL;\r
- struct X_template *R=NULL;\r
- struct X_template *G=NULL;\r
- struct X_template *F=NULL;\r
- struct X_template *T=NULL;\r
- struct X_template *E=NULL;\r
- struct X_template *U=NULL;\r
- Alignment *A;\r
-\r
-\r
- e=' ';\r
- for (a=0; a< Seq->nseq; a++)\r
- {\r
- if (!Seq->T[a])continue;\r
- //HERE ADD a Template\r
- P=seq_has_template (Seq, a, "_P_");\r
- S=seq_has_template (Seq, a, "_S_");\r
- R=seq_has_template (Seq, a, "_R_");\r
- G=seq_has_template (Seq, a, "_G_");\r
- F=seq_has_template (Seq, a, "_F_");\r
- T=seq_has_template (Seq, a, "_T_");\r
- E=seq_has_template (Seq, a, "_E_");\r
- U=seq_has_template (Seq, a, "_U_");\r
- \r
- s=(!P)?1:0;\r
- sprintf ( (Seq->T[a])->seq_type, "%c%c%c%c%c%c%c%c", (P)?'P':e, (S)?'S':e, (S &&!P)?'s':e,(R)?'R':e, (G)?'G':e,(T)?'T':e,(E)?'E':e,(U)?'U':e);\r
- \r
- if (R && (A=seq2R_template_profile (Seq,a)))\r
- {\r
- \r
- A->S=seq2template_type ( A->S);\r
- } \r
- }\r
- return Seq;\r
-}\r
-\r
-char * string_contains_template_tag (char *string_in)\r
-{\r
- char string[100];\r
- \r
- if ( strstr (string, "_P_"))return "_P_";\r
- if ( strstr (string, "_S_"))return "_S_";\r
- if ( strstr (string, "_R_"))return "_R_";\r
- if ( strstr (string, "_G_"))return "_G_";\r
- if ( strstr (string, "_F_"))return "_F_";\r
- if ( strstr (string, "_T_"))return "_T_";\r
- if ( strstr (string, "_E_"))return "_E_";\r
- if ( strstr (string, "_U_"))return "_U_";\r
- \r
- return NULL;\r
-}\r
-static int check_blast_is_installed (char *server);\r
-\r
-\r
- \r
-static int check_blast_is_installed (char *server)\r
-{\r
- if (strm (server, "EBI"));\r
- else if ( strm (server, "NCBI"))\r
- return check_program_is_installed (NCBIWEBBLAST_4_TCOFFEE,NULL, NULL,NCBIWEBBLAST_ADDRESS, INSTALL_OR_DIE);\r
- else if ( strm (server, "LOCAL"))\r
- return check_program_is_installed (NCBIBLAST_4_TCOFFEE,NULL, NULL,NCBIBLAST_ADDRESS, INSTALL_OR_DIE);\r
- return 1;\r
-}\r
-\r
-\r
-Sequence * vremove_seq_template_files(Sequence *S)\r
-{\r
- return handle_seq_template_file (S, "remove");\r
-}\r
-Sequence * display_seq_template_files(Sequence *S)\r
-{\r
- return handle_seq_template_file (S, "display");\r
-}\r
-Sequence * handle_seq_template_file (Sequence *S, char *mode)\r
-{\r
- int a;\r
- Template *T;\r
- \r
- for (a=0; a< S->nseq; a++)\r
- {\r
- T=S->T[a];\r
- if (T)\r
- {\r
- handle_X_template_files (T->P, mode);\r
- handle_X_template_files (T->F, mode);\r
- handle_X_template_files (T->R, mode);\r
- handle_X_template_files (T->T, mode);\r
- handle_X_template_files (T->E, mode);\r
- }\r
- }\r
- \r
- return S;\r
-}\r
-int handle_X_template_files ( X_template *T, char *mode)\r
- {\r
- if (!T)return 0;\r
- \r
- if ( strm (mode, "remove"))\r
- {\r
- vremove (T->template_file);\r
- vremove (T->template_name);\r
- }\r
- else if (strm (mode, "display"))\r
- {\r
- char buf[100];\r
- sprintf ( buf, "Template %s", template_type2type_name (T->template_type));\r
- if (check_file_exists (T->template_name))display_output_filename ( stdout,buf,T->template_format,T->template_name, STORE); \r
- }\r
- else\r
- {\r
- printf_exit (EXIT_FAILURE, stderr, "\nUnkonwn mode %s for template handling [FATAL:%s]", mode, PROGRAM);\r
- }\r
- return 1;\r
- }\r
-Sequence * seq2template_seq ( Sequence *S, char *template_list, Fname *F)\r
-{\r
- /*Expected format for the template file:\r
- >seq_name _X_ Target_template\r
- X: S for Structures \r
- G for genomes (Exoset)\r
- When alternative templates are given for a sequence, the first one superseeds all the others\r
- */\r
- \r
- /*Fill the sequences*/\r
- /*1: No template*/\r
- char buf[1000];\r
-\r
- int PmC,PmI,PMI;\r
- int BmC,BmI,BMI;\r
- char *server;\r
- char *pdb_db,*prot_db;\r
-\r
- int remove_template_file=0;\r
- \r
- remove_template_file=get_int_variable ("remove_template_file");\r
- server=get_string_variable ("blast_server");\r
- pdb_db=get_string_variable ("pdb_db");\r
- prot_db=get_string_variable ("prot_db");\r
- \r
- PmI=get_int_variable ("pdb_min_sim");\r
- PMI=get_int_variable ("pdb_max_sim");\r
- PmC=get_int_variable ("pdb_min_cov");\r
-\r
- BmI=get_int_variable ("prot_min_sim");\r
- BMI=get_int_variable ("prot_max_sim");\r
- BmC=get_int_variable ("prot_min_cov");\r
- \r
- if ( (template_list && template_list[0]=='\0') || strm ( template_list, "no_template")) \r
- {\r
- return S;\r
- }\r
- else if ( strstr (template_list, "MODE_"))//pre_set mode\r
- {\r
- return seq2template_seq ( S,template_list+strlen ("MODE_"),F);\r
- }\r
- else if ( strm ( template_list, "SSP")|| strm ( template_list, "GOR"))\r
- {\r
- \r
- /*use GOR to Predict the secondary structure*/\r
- check_program_is_installed (GOR4_4_TCOFFEE,NULL, NULL,GOR4_ADDRESS, INSTALL_OR_DIE);\r
- sprintf ( buf, "SCRIPT_tc_generic_method.pl@mode#ssp_template@seq#%s/%s@obs#%s/%s@cache#%s@type#_E_",get_mcoffee_4_tcoffee(), "New_KS.267.seq", get_mcoffee_4_tcoffee(), "New_KS.267.obs", get_cache_dir());\r
- S=seq2template_seq (S,buf, F);\r
- return S;\r
- }\r
- else if ( strm ( template_list, "PSISSP") || strm (template_list, "PSIGOR"))\r
- {\r
- \r
- /*Computes a GOR consensus on a psi-blast output*/\r
- check_program_is_installed (GOR4_4_TCOFFEE,NULL, NULL,GOR4_ADDRESS, INSTALL_OR_DIE);\r
- check_blast_is_installed(server);\r
- \r
- sprintf ( buf, "SCRIPT_tc_generic_method.pl@mode#psissp_template@seq#%s/%s@obs#%s/%s@cache#%s@minid#%d@maxid#%d@mincov#%d@server#%s@type#_E_",get_mcoffee_4_tcoffee(), "New_KS.267.seq", get_mcoffee_4_tcoffee(), "New_KS.267.obs", get_cache_dir(), BmI,BMI,BmC,server);\r
- S=seq2template_seq (S,buf, F);\r
- return S;\r
- }\r
- else if ( strm ( template_list, "TM"))\r
- {\r
- \r
- /*predict transmembrane structure*/\r
- check_program_is_installed (HMMTOP_4_TCOFFEE,NULL, NULL,HMMTOP_ADDRESS, INSTALL_OR_DIE);\r
- sprintf ( buf, "SCRIPT_tc_generic_method.pl@mode#tm_template@arch#%s/%s@psv#%s/%s@type#_T_",get_mcoffee_4_tcoffee(), "hmmtop.arch", get_mcoffee_4_tcoffee(), "hmmtop.psv");\r
- S=seq2template_seq (S,buf, F);\r
- return S;\r
- }\r
- else if ( strm ( template_list, "PSITM"))\r
- {\r
- \r
- /*predict transmembrane structure*/\r
- check_program_is_installed (HMMTOP_4_TCOFFEE,NULL, NULL,HMMTOP_ADDRESS, INSTALL_OR_DIE);\r
- check_blast_is_installed(server);\r
- \r
- sprintf ( buf, "SCRIPT_tc_generic_method.pl@mode#psitm_template@arch#%s/%s@psv#%s/%s@cache#%s@minid#%d@maxid#%d@mincov#%d@server#%s@type#_T_",get_mcoffee_4_tcoffee(), "hmmtop.arch", get_mcoffee_4_tcoffee(), "hmmtop.psv",get_cache_dir(), BmI,BMI,BmC,server);\r
- S=seq2template_seq (S,buf, F);\r
- return S;\r
- }\r
- \r
- else if (strm ( template_list, "PSIBLAST"))\r
- {\r
- check_blast_is_installed(server);\r
- sprintf ( buf, "SCRIPT_tc_generic_method.pl@mode#psiprofile_template@database#%s@method#psiblast@cache#%s@minid#%d@maxid#%d@mincov#%d@server#%s@type#_R_", prot_db,get_cache_dir(),BmI,BMI,BmC,server);\r
- S=seq2template_seq (S,buf, F);\r
- \r
- return S;\r
- }\r
- else if (strm ( template_list, "BLAST") )\r
- {\r
- check_blast_is_installed(server);\r
- sprintf ( buf, "SCRIPT_tc_generic_method.pl@mode#profile_template@database#%s@method#blastp@cache#%s@minid#%d@maxid#%d@mincov#%d@server#%s@type#_R_", prot_db,get_cache_dir(),BmI,BMI,BmC,server);\r
- S=seq2template_seq (S,buf, F);\r
- \r
- return S;\r
- }\r
- else if ( strm ( template_list, "EXPRESSO") || strm (template_list, "PDB"))\r
- {\r
- check_blast_is_installed(server);\r
- \r
- int isRNA = 0;\r
- int i;\r
- for (i= 0; i < S->len[0]; ++i)\r
- {\r
- isRNA = (isRNA || is_rna(S->seq[0][i]));\r
- }\r
- \r
- if (isRNA)\r
- {\r
- sprintf ( buf, "SCRIPT_tc_generic_method.pl@mode#pdb_template@database#%s@method#blastn@cache#%s@minid#%d@maxid#%d@mincov#%d@server#%s@type#_P_",pdb_db, get_cache_dir(),PmI,PMI,PmC, server);\r
- }\r
- else\r
- {\r
- sprintf ( buf, "SCRIPT_tc_generic_method.pl@mode#pdb_template@database#%s@method#blastp@cache#%s@minid#%d@maxid#%d@mincov#%d@server#%s@type#_P_",pdb_db, get_cache_dir(),PmI,PMI,PmC, server);\r
- }\r
- return seq2template_seq (S,buf, F);\r
- }\r
- \r
- else if ( strm (template_list, "RCOFFEE") || strm (template_list, "RNA"))\r
- {\r
- char *file_struc_clac = vtmpnam (NULL);\r
- FILE* struc_calc_f =vfopen(file_struc_clac,"w");\r
- int i;\r
- int j = 0;\r
- for (i = 0; i< S->nseq; ++i)\r
- {\r
- if (S->T[i]->P)\r
- {\r
- ++j;\r
- fprintf(struc_calc_f,"%s %s\n",S->name[i],S->T[i]->P->template_file); \r
- }\r
- }\r
- \r
- vfclose(struc_calc_f);\r
- check_program_is_installed (RNAPLFOLD_4_TCOFFEE,NULL, NULL,RNAPLFOLD_ADDRESS, IS_FATAL);\r
- sprintf ( buf, "SCRIPT_tc_generic_method.pl@mode#RNA_template@type#_F_");\r
- if (j > 0)\r
- {\r
- S = seq2template_seq (S,buf,F);\r
- sprintf ( buf, "SCRIPT_tc_generic_method.pl@mode#calc_rna_template@pdbfile#%s@cache#%s@type#_F_", file_struc_clac,get_cache_dir());\r
- }\r
-// printf("IN T_\n");\r
- return seq2template_seq (S,buf,F);\r
- }\r
- \r
- /*2: Templates from seqnames (SELF) or named like the sequences (SEQFILE)*/\r
- else if ( strstr (template_list, "SELF_") ||strstr (template_list, "SEQFILE_") )\r
- {\r
- int a;\r
- char *p;\r
-\r
- //add template\r
- for (a=0; a< S->nseq; a++)\r
- {\r
-\r
- if ( (p=strstr (template_list,"SELF_")))p=S->name[a];\r
- else if ( strstr (template_list, "SEQFILE_"))p=template_list;\r
- else \r
- {\r
- fprintf ( stderr, "\nUnkown mode for Template [FATAL:%s]\n", PROGRAM);\r
- myexit (EXIT_FAILURE);\r
- }\r
- \r
- if ( strstr (template_list, "_P_") && !(S->T[a])->P)\r
- {\r
- (S->T[a])->P =fill_P_template ( S->name[a], p,S);//PDB\r
- \r
- }\r
- else if ( strstr (template_list, "_S_") && !(S->T[a])->S)(S->T[a])->S =fill_S_template ( S->name[a], p,S);//Sequence \r
- else if ( strstr (template_list, "_R_" )&& !(S->T[a])->R)(S->T[a])->R =fill_R_template ( S->name[a], p,S);//pRofile\r
- else if ( strstr (template_list, "_G_" )&& !(S->T[a])->G)(S->T[a])->G =fill_G_template ( S->name[a], p,S);//Genomic\r
- else if ( strstr (template_list, "_F_" )&& !(S->T[a])->F)(S->T[a])->F =fill_F_template ( S->name[a], p,S);//Fold \r
- else if ( strstr (template_list, "_T_" )&& !(S->T[a])->T)(S->T[a])->T =fill_T_template ( S->name[a], p,S);//Trans Membrane\r
- else if ( strstr (template_list, "_E_" )&& !(S->T[a])->E)(S->T[a])->E =fill_E_template ( S->name[a], p,S);//Secondary Structure\r
- else if ( strstr (template_list, "_U_" )&& !(S->T[a])->U)(S->T[a])->U =fill_U_template ( S->name[a], p,S);//unicode, list template\r
- \r
- }\r
- return S;\r
- }\r
-\r
- /*2: Templates comes in a template_file*/\r
- else if ( template_list==NULL || format_is_fasta (template_list))\r
- {\r
- Sequence *T;\r
- int a, i;\r
- int ntemp=0;\r
- T=(template_list!=NULL)?get_fasta_sequence (template_list, NULL):S;\r
- for (a=0; a< T->nseq; a++)\r
- {\r
- \r
- char *p;\r
- if ((i=name_is_in_list(T->name[a], S->name, S->nseq, MAXNAMES))!=-1)\r
- {\r
- if ( (p=strstr (T->seq_comment[a], " _P_ ")) && !(S->T[i])->P &&( (S->T[i])->P=fill_P_template (S->name[i],p,S)))\r
- {\r
- ntemp++;\r
- }\r
- else if ( (p=strstr (T->seq_comment[a], " _F_ ")) && !(S->T[i])->F &&( (S->T[i])->F=fill_F_template (S->name[i],p,S)))ntemp++;\r
- else if ( (p=strstr (T->seq_comment[a], " _S_ ")) && !(S->T[i])->S &&( (S->T[i])->S=fill_S_template (S->name[i],p,S)))ntemp++;\r
- \r
- else if ( (p=strstr (T->seq_comment[a], " _R_ ")) && !(S->T[i])->R &&( (S->T[i])->R=fill_R_template (S->name[i],p,S)))ntemp++;\r
- else if ( (p=strstr (T->seq_comment[a], " _G_ ")) && !(S->T[i])->G &&( (S->T[i])->G=fill_G_template (S->name[i],p,S)))ntemp++;\r
- else if ( (p=strstr (T->seq_comment[a], " _T_ ")) && !(S->T[i])->T &&( (S->T[i])->T=fill_T_template (S->name[i],p,S)))ntemp++;\r
- else if ( (p=strstr (T->seq_comment[a], " _E_ ")) && !(S->T[i])->E &&( (S->T[i])->E=fill_E_template (S->name[i],p,S)))ntemp++;\r
- else if ( (p=strstr (T->seq_comment[a], " _U_ ")) && !(S->T[i])->U &&( (S->T[i])->E=fill_U_template (S->name[i],p,S)))ntemp++;\r
- \r
- if (T!=S)strcat (S->seq_comment[i], T->seq_comment[a]);\r
- }\r
- }\r
- \r
- if (T!=S)free_sequence (T, -1);\r
- \r
- if ( remove_template_file==2 || ntemp==0)\r
- {\r
- vremove (template_list);\r
- }\r
- else \r
- if (template_list)display_output_filename ( stdout, "Template_List","fasta_seq", template_list, STORE); \r
- return S;\r
- }\r
- \r
- /*3 Templates are generated with a script*/\r
- else if (strstr (template_list, "SCRIPT_") && get_string_variable ("multi_core") && strstr (get_string_variable ("multi_core"), "templates") && get_nproc()>1)\r
- {\r
- char *tmp1,*command;\r
- Alignment *A;\r
- char **temp_file,**seq_file;\r
- int * pid_list, pid, npid, submited;\r
- int nproc, max_nproc;\r
- \r
- char outfile[1000];\r
- static char *script;\r
- static int ntemp;\r
- char *p;\r
- int z, i;\r
- if (!script)script=vcalloc ( 1000, sizeof(char));\r
- \r
- ntemp++;\r
- \r
- command=vcalloc ( 1000, sizeof (char));\r
- tmp1=vtmpnam (NULL); \r
- \r
- A=seq2aln (S,NULL, 0);\r
- string_array_upper(A->seq_al, A->nseq);\r
- output_fasta_seq (tmp1, A);\r
- sprintf ( script, "%s", after_strstr (template_list, "SCRIPT_"));\r
- \r
- if ((p=strstr (template_list, "@type#")))\r
- p+=strlen ("@type#");\r
- if (F)\r
- {\r
- sprintf (outfile, "%s%s_%s%d.template_list", F->path,F->name,template_type2short_type_name(p),ntemp);\r
- }\r
- else\r
- {\r
- F=parse_fname (S->file[0]);\r
- sprintf (outfile, "%s%s_%s%d.template_list",F->path, F->name,template_type2short_type_name(p),ntemp);\r
- free_fname (F);\r
- }\r
- \r
- nproc=get_nproc();\r
- max_nproc=2*nproc;\r
- \r
- script=substitute(script, "@", " -");\r
- script=substitute(script, "#", "=");\r
- \r
- temp_file=vcalloc ( A->nseq, sizeof (char*));\r
- seq_file =vcalloc (A->nseq, sizeof (char*)); \r
- pid_list =vcalloc (MAX_N_PID, sizeof (int *));\r
- \r
- fprintf ( stderr, "\n\t------ Fetch %Templates [Multi Core Mode %d CPUs]\n",get_nproc());\r
- for (npid=0, submited=0,i=0; i<S->nseq; i++)\r
- {\r
- FILE *fp2;\r
- seq_file[i]=vtmpnam (NULL);\r
- temp_file[i]=vtmpnam (NULL);\r
- fp2=vfopen (seq_file[i], "w");\r
- fprintf ( fp2, ">%s\n%s\n", S->name[i], S->seq[i]);\r
- vfclose (fp2);\r
- \r
- pid=vfork();\r
- if (pid==0)\r
- {\r
- initiate_vtmpnam (NULL);\r
- if ( strstr (script, "tc_generic_method"))\r
- {\r
- //sprintf ( command, "%s -other_pg %s -infile=%s -outfile=%s -tmpdir=%s",get_string_variable ("t_coffee"),script,seq_file[i],temp_file[i],get_tmp_4_tcoffee());\r
- sprintf ( command, "%s -infile=%s -outfile=%s -tmpdir=%s",script,seq_file[i],temp_file[i],get_tmp_4_tcoffee());\r
- if (strstr (command, "EBI"))get_email ();\r
- }\r
- else \r
- //sprintf ( command, "%s -other_pg %s -infile=%s -outfile=%s",get_string_variable("t_coffee"),script,seq_file[i],temp_file[i]);\r
- sprintf ( command, "%s -infile=%s -outfile=%s",script,seq_file[i],temp_file[i]);\r
- command=substitute(command, "@", " ");\r
- my_system ( command);\r
- exit (EXIT_SUCCESS);\r
- }\r
- else\r
- {\r
- pid_list[pid]=npid;\r
- set_pid(pid);\r
- npid++;\r
- submited++;\r
- submited=vwait_npid(submited,max_nproc,nproc);\r
- }\r
- }\r
- \r
- submited=vwait_npid(submited,0,0);\r
- //Concatenate all the files\r
- vremove (outfile);\r
- for (i=0; i<npid; i++) file_cat (temp_file[i],outfile);\r
- \r
- //Free the process table\r
- vfree (temp_file);\r
- vfree (pid_list);\r
- vfree (seq_file);\r
- \r
- free_aln (A);\r
- if ( check_file_exists (outfile) && format_is_fasta(outfile))\r
- {\r
- S=seq2template_seq (S, outfile, F);\r
- }\r
- else if (strstr (command, "webblast.pl"))return S; \r
- else \r
- {\r
- \r
- add_warning (stderr, "\nWARNING: Could not Run %s to find templates[%s]\n",command, PROGRAM);\r
- return NULL;\r
- }\r
- \r
- vfree (command);\r
- return S;\r
- }\r
- \r
- else if (strstr (template_list, "SCRIPT_"))\r
- {\r
- char x[299];\r
- char *tmp1,*command;\r
- Alignment *A;\r
- char outfile[1000];\r
- static char *script;\r
- static int ntemp;\r
- char *p;\r
- int z;\r
- if (!script)script=vcalloc ( 1000, sizeof(char));\r
- \r
- ntemp++;\r
- \r
- command=vcalloc ( 1000, sizeof (char));\r
- tmp1=vtmpnam (NULL); \r
- \r
- A=seq2aln (S,NULL, 0);\r
- string_array_upper(A->seq_al, A->nseq);\r
- output_fasta_seq (tmp1, A);\r
- sprintf ( script, "%s", after_strstr (template_list, "SCRIPT_"));\r
- fprintf ( stderr, "\n");\r
- if ((p=strstr (template_list, "@type#")))\r
- p+=strlen ("@type#");\r
- if (F)\r
- {\r
- sprintf (outfile, "%s%s_%s%d.template_list", F->path,F->name,template_type2short_type_name(p),ntemp);\r
- }\r
- else\r
- {\r
- F=parse_fname (S->file[0]);\r
- sprintf (outfile, "%s%s_%s%d.template_list",F->path, F->name,template_type2short_type_name(p),ntemp);\r
- free_fname (F);\r
- }\r
- \r
- script=substitute(script, "@", " -");\r
- script=substitute(script, "#", "=");\r
- \r
- if ( strstr (script, "tc_generic_method"))\r
- {\r
- sprintf ( command, "%s -other_pg %s -infile=%s -outfile=%s -tmpdir=%s",get_string_variable ("t_coffee"),script, tmp1,outfile,get_tmp_4_tcoffee());\r
- if (strstr (command, "EBI"))get_email ();\r
- }\r
- else sprintf ( command, "%s -other_pg %s -infile=%s -outfile=%s",get_string_variable("t_coffee"),script, tmp1, outfile);\r
- \r
- vremove (outfile);\r
- command=substitute(command, "@", " ");\r
- \r
- my_system ( command);\r
- \r
- free_aln (A);\r
- \r
- if ( check_file_exists (outfile) && format_is_fasta(outfile))\r
- {\r
- S=seq2template_seq (S, outfile, F);\r
- }\r
- else if (strstr (command, "webblast.pl"))return S; \r
- else \r
- {\r
- \r
- add_warning (stderr, "\nWARNING: Could not Run %s to find templates[%s]\n",command, PROGRAM);\r
- return NULL;\r
- }\r
- \r
- vfree (command);\r
- return S;\r
- }\r
- \r
- return S;\r
-}\r
-\r
-char* seq2template_file (Sequence *S, char *file)\r
-{\r
- Alignment *A;\r
- int i;\r
- \r
- if (file==NULL)file=vtmpnam (NULL);\r
- \r
- seq2template_file2 (S, file, "w");\r
- for (i=0; i<S->nseq; i++)\r
- if ( (A=seq2R_template_profile (S, i)))\r
- {\r
- seq2template_file2 (A->S, file, "a");\r
- }\r
- return file;\r
-}\r
- \r
-int seq2template_file2 (Sequence *S, char *file, char *mode)\r
-{\r
- FILE *fp;\r
- int i;\r
- char buf1[10000];\r
- char buf2[10000];\r
- struct X_template *X;\r
- \r
- fp=vfopen ( file, mode);\r
- for ( i=0; i< S-> nseq; i++)\r
- {\r
- buf1[0]=0;\r
- if ( (X=(S->T[i])->P)){sprintf (buf2, " %s %s ", X->template_type, X->template_file);strcat (buf1, buf2);}\r
- /*if ( (X=(S->T[i])->S)){sprintf (buf2, " %s %s ", X->template_type, X->template_file);strcat (buf1, buf2);}*/\r
- if ( (X=(S->T[i])->R)){sprintf (buf2, " %s %s ", X->template_type, X->template_file);strcat (buf1, buf2);}\r
- if ( (X=(S->T[i])->G)){sprintf (buf2, " %s %s ", X->template_type, X->template_file);strcat (buf1, buf2);}\r
- if (buf1[0])fprintf ( fp, ">%s %s\n", S->name[i], buf1);\r
- }\r
- vfclose (fp);\r
- return EXIT_SUCCESS;\r
-}\r
- \r
- \r
- \r
- \r
-int seq2n_X_template ( Sequence *S, char *type)\r
-{\r
- int a, n;\r
- \r
- for (n=0,a=0; a< S->nseq; a++)\r
- {\r
- if ( strm2 (type, "_P_","_*_") && (S->T[a])->P)n++;\r
- if ( strm2 (type, "_F_","_*_") && (S->T[a])->F)n++;\r
- if ( strm2 (type, "_S_","_*_") && (S->T[a])->S)n++;\r
- if ( strm2 (type, "_R_","_*_") && (S->T[a])->R)n++;\r
- if ( strm2 (type, "_G_","_*_") && (S->T[a])->G)n++;\r
- }\r
- return n;\r
-}\r
-struct X_template *fill_X_template ( char *name, char *p, char *token)\r
-{\r
- struct X_template *X;\r
-\r
-\r
-\r
-\r
- char *k;\r
- \r
- X=vcalloc (1, sizeof (X_template));\r
- sprintf ( X->seq_name, "%s", name);\r
- if ( (k=strstr (p, token)))sscanf (k+strlen(token), "%s",X->template_name);\r
- else sprintf (X->template_name, "%s", p);\r
- \r
- \r
- /*Add a Structure HERE*/\r
- sprintf ( X->template_type, "%s", token);\r
- if ( strm (token, "_P_"))X->VP=vcalloc (1, sizeof (P_template));\r
- if ( strm (token, "_F_"))X->VF=vcalloc (1, sizeof (F_template));\r
- \r
- if ( strm (token, "_S_"))X->VS=vcalloc (1, sizeof (S_template));\r
- if ( strm (token, "_R_"))X->VR=vcalloc (1, sizeof (R_template));\r
- if ( strm (token, "_G_"))X->VG=vcalloc (1, sizeof (G_template));\r
- if ( strm (token, "_T_"))X->VT=vcalloc (1, sizeof (T_template));\r
- if ( strm (token, "_E_"))X->VE=vcalloc (1, sizeof (E_template));\r
- if ( strm (token, "_U_"))X->VU=vcalloc (1, sizeof (U_template));\r
- \r
- return X;\r
-}\r
-\r
-struct X_template* free_X_template ( struct X_template *X)\r
-{\r
- if (X->VP)\r
- {\r
- vfree (X->VP);\r
- }\r
- if (X->VF)\r
- {\r
- vfree (X->VF);\r
- }\r
- if ( X->VS)\r
- {\r
- free_sequence ((X->VS)->S, -1);\r
- vfree (X->VS);\r
- }\r
- if ( X->VR)\r
- {\r
- free_aln ((X->VR)->A);\r
- vfree (X->VR);\r
- }\r
- if ( X->VG)\r
- {\r
- free_sequence ((X->VG)->S, -1);\r
- vfree (X->VG);\r
- }\r
- \r
- vfree (X);\r
- return NULL;\r
-}\r
-\r
-FILE * display_sequence_templates (Sequence *S,int i, FILE *io)\r
-{\r
- \r
-\r
- io=display_X_template ( (S->T[i])->P, io);\r
-\r
- io=display_X_template ( (S->T[i])->F, io);\r
-\r
- io=display_X_template ( (S->T[i])->S, io);\r
-\r
- io=display_X_template ( (S->T[i])->R, io);\r
- io=display_X_template ( (S->T[i])->G, io);\r
- io=display_X_template ( (S->T[i])->T, io);\r
- io=display_X_template ( (S->T[i])->E, io);\r
-\r
- return io;\r
-}\r
- \r
-FILE * display_X_template (struct X_template *X, FILE *io)\r
-{\r
- \r
- if ( !X) return io;\r
- if ( !strm (X->template_type, "_S_"))fprintf (io, "\n\t%s: Template=%s, File=%s",template_type2type_name (X->template_type), X->template_name,X->template_file);\r
- return io;\r
-}\r
-char *template_type2short_type_name (char *type)\r
-{\r
- //add_template\r
- if (!type)return "";\r
- else if ( strstr (type, "_P_")) return "pdb";\r
- else if ( strstr (type, "_F_")) return "rfold";\r
- else if ( strstr (type, "_S_")) return "seq";\r
- else if ( strstr (type, "_R_")) return "prf";\r
- else if ( strstr (type, "_G_")) return "genome";\r
- else if ( strstr (type, "_E_")) return "ssp";\r
- else if ( strstr (type, "_T_")) return "tmp";\r
- else if ( strstr (type, "_U_")) return "unicode";\r
- else return type;\r
-}\r
-char *template_type2type_name (char *type)\r
-{\r
- //add_template\r
- if ( strstr (type, "_P_")) return "PDB struc";\r
- else if ( strstr (type, "_F_")) return "RNA Fold";\r
- else if ( strstr (type, "_S_")) return "Sequeence";\r
- else if ( strstr (type, "_R_")) return "Profile";\r
- else if ( strstr (type, "_G_")) return "Genomic";\r
- else if ( strstr (type, "_E_")) return "Protein Secondary Structure";\r
- else if ( strstr (type, "_T_")) return "Protein Trans Membrane Structure ";\r
- else if ( strstr (type, "_U_")) return "Unicode and strings";\r
- \r
- else return type;\r
-}\r
-struct X_template *fill_F_template ( char *name,char *p, Sequence *S)\r
-{\r
- /*Profile template*/\r
- struct X_template *F;\r
- \r
- F=fill_X_template ( name, p, "_F_"); \r
- sprintf (F->template_format , "TCOFFEE_LIBRARY");\r
- if (!F || !check_file_exists (F->template_name))\r
- {\r
- fprintf ( stderr, "\nWARNING: Could Not Fill _F_ (Fold) template for sequence |%s|", name);\r
- free_X_template (F);\r
- return NULL;\r
- }\r
- else if ( check_file_exists (F->template_name))\r
- {\r
- sprintf ( F->template_file, "%s", F->template_name);\r
- }\r
- \r
- return F;\r
-\r
-}\r
- \r
-\r
-struct X_template *fill_P_template ( char *name,char *p, Sequence *S)\r
-{\r
- struct X_template *P;\r
- Sequence *PS;\r
- Alignment *A;\r
- int sim, cov, i;\r
- char *buf;\r
- \r
- \r
- P=fill_X_template ( name, p, "_P_"); \r
- sprintf (P->template_format , "pdb");\r
- \r
- if (!P)\r
- {\r
- //fprintf ( stderr, "\nWARNING: Could Not Fill _P_ template for sequence |%s|", name);\r
- free_X_template (P);\r
- return NULL;\r
- }\r
- else if ( check_file_exists (P->template_name))\r
- {\r
- \r
- sprintf ( P->template_file, "%s", P->template_name);\r
- buf=path2filename (P->template_name);\r
- if (P->template_name!=buf)\r
- {\r
- sprintf ( P->template_name, "%s",buf );\r
- vfree (buf);\r
- }\r
- }\r
- else\r
- {\r
- char *st;\r
- \r
- \r
- st=is_pdb_struc (P->template_name);\r
- if (st)\r
- {\r
- if (st!=P->template_file)sprintf ( P->template_file, "%s", st);\r
- }\r
- }\r
- \r
- /*Make a first run to fix relaxed PDB files*/\r
- buf=fix_pdb_file (P->template_file);\r
- \r
- if ( buf!=P->template_file)\r
- {\r
- \r
- sprintf ( P->template_file, "%s",buf);\r
- vfree (buf);\r
- }\r
- \r
- /*Check the PDB FILE EXISTS*/\r
- if (!is_pdb_file (P->template_file))\r
- {\r
-\r
- add_warning(stderr, "\nWARNING: _P_ Template |%s| Could Not Be Found\n",p);\r
- free_X_template (P);\r
- return NULL;\r
- }\r
- else\r
- {\r
- buf= get_pdb_id (P->template_file);\r
- if (buf!=(P->VP)->pdb_id)\r
- {\r
- sprintf ((P->VP)->pdb_id, "%s", buf);\r
- vfree (buf);\r
- }\r
- }\r
-\r
- /*Check the target sequence is similar enough*/\r
- \r
- PS=get_pdb_sequence (P->template_file);\r
- \r
-\r
- if ( PS==NULL)\r
- {\r
- add_warning( stderr, "\nWARNING: _P_ Template |%s| Could Not be Used for Sequence |%s|: Structure Not Found", P->template_name, name);\r
- free_X_template (P);P=NULL;\r
- }\r
- else\r
- {\r
- int minsim=get_int_variable ("pdb_min_sim");\r
- int mincov=get_int_variable ("pdb_min_cov");\r
-\r
- \r
- i=name_is_in_list (name, S->name, S->nseq, 100);\r
- \r
- A=align_two_sequences (S->seq[i], PS->seq[0],"idmat",-3,0, "fasta_pair_wise");\r
- cov=aln2coverage (A, 0);\r
- sim=aln2sim (A, "idmat");\r
- \r
- if (sim<minsim)\r
- {\r
- add_warning( stderr, "\nWARNING: _P_ Template %s Could Not be Used for Sequence %s: Similarity too low [%d, Min=%d]",P->template_name,name, sim, minsim);\r
- free_X_template (P);\r
- P=NULL;\r
- }\r
- else if ( cov<mincov)\r
- {\r
- add_warning( stderr, "\nWARNING: _P_ Template |%s| Could Not be Used for Sequence |%s|: Coverage too low [%d, Min=%d]", P->template_name,name, cov, mincov);\r
- free_X_template (P);P=NULL;\r
- }\r
- free_aln(A);\r
- free_sequence (PS, -1);\r
- }\r
- \r
- return P;\r
-}\r
-\r
-struct X_template *fill_S_template ( char *name,char *p, Sequence *Seq)\r
-{\r
- struct X_template *S;\r
- S=fill_X_template ( name, p, "_S_"); \r
- if ( strm (name, p))sprintf ( S->template_file, "%s",output_fasta_seqX (NULL,"w",Seq,NULL, seq_name2index (name, Seq)));\r
- (S->VS)->S=get_fasta_sequence (S->template_file, NULL);\r
- return S;\r
-}\r
-struct X_template *fill_R_template ( char *name,char *p, Sequence *S)\r
-{\r
- /*Profile template*/\r
- struct X_template *R;\r
- \r
-\r
- R=fill_X_template ( name, p, "_R_");\r
- sprintf (R->template_format , "fasta_aln");\r
- \r
- \r
- if (!is_aln(R->template_name) && !is_seq (R->template_name))\r
- {\r
- \r
- add_warning ( stderr, "\nWARNING: _R_ Template %s Could Not Be Found\n",R->template_name);\r
- free_X_template (R);\r
- return NULL;\r
- }\r
- else\r
- {\r
- int s;\r
- Sequence *S1;\r
- Alignment *A1;\r
- \r
- (R->VR)->A=main_read_aln (R->template_name, NULL);\r
- \r
- if ( !S)\r
- sprintf ( R->template_file, "%s", R->template_name);\r
- else\r
- {\r
- s=name_is_in_list(name, S->name, S->nseq, 100);\r
- if ( s!=-1)\r
- {\r
- S1=fill_sequence_struc (1, &S->seq[s], &S->name[s]);\r
- A1=seq2aln (S1,NULL, RM_GAP);\r
- \r
- (R->VR)->A=trim_aln_with_seq (A1, (R->VR)->A);\r
- \r
- sprintf ( R->template_file, "%s", vtmpnam (NULL));\r
- output_clustal_aln (R->template_file, (R->VR)->A);\r
- }\r
- else\r
- sprintf ( R->template_file, "%s", R->template_name);\r
- }\r
- (R->VR)->A=aln2profile ((R->VR)->A);\r
- }\r
- return R;\r
-}\r
-\r
-struct X_template *fill_T_template ( char *name,char *p, Sequence *S)\r
-{\r
- /*Profile template*/\r
- struct X_template *T;\r
- \r
- T=fill_X_template ( name, p, "_T_");\r
- sprintf (T->template_format , "fasta_seq");\r
- \r
- if (!is_aln(T->template_name) && !is_seq (T->template_name))\r
- {\r
- \r
- add_warning ( stderr, "\nWARNING: _T_ Template %s Could Not Be Found\n",T->template_name);\r
- free_X_template (T);\r
- return NULL;\r
- }\r
- else\r
- {\r
- \r
- (T->VT)->S=main_read_seq(T->template_name);\r
- sprintf ( T->template_file, "%s", T->template_name);\r
- }\r
- return T;\r
-}\r
-//add template\r
-struct X_template *fill_U_template ( char *name,char *p, Sequence *S)\r
-{\r
- /*Profile template*/\r
- struct X_template *U;\r
- \r
- U=fill_X_template ( name, p, "_U_");\r
- sprintf (U->template_format , "string list");\r
- \r
- if (!check_file_exists(U->template_name))\r
- {\r
- add_warning ( stderr, "\nWARNING: _U_ Template %s Could Not Be Found\n",U->template_name);\r
- free_X_template (U);\r
- return NULL;\r
- }\r
- else\r
- {\r
- //(U->VU)->list=file2string(U->template_name);\r
- sprintf ( U->template_file, "%s", U->template_name);\r
- }\r
- return U;\r
-}\r
-struct X_template *fill_E_template ( char *name,char *p, Sequence *S)\r
-{\r
- /*Profile template*/\r
- struct X_template *E;\r
- \r
-\r
- E=fill_X_template ( name, p, "_E_");\r
- sprintf (E->template_format , "fasta_seq");\r
- \r
- if (!is_aln(E->template_name) && !is_seq (E->template_name))\r
- {\r
- \r
- add_warning ( stderr, "\nWARNING: _E_ Template %s Could Not Be Found\n",E->template_name);\r
- free_X_template (E);\r
- return NULL;\r
- }\r
- else\r
- {\r
- (E->VE)->S=main_read_seq (E->template_name);\r
- sprintf ( E->template_file, "%s", E->template_name);\r
- }\r
- return E;\r
-}\r
-struct X_template *fill_G_template ( char *name,char *p, Sequence *S)\r
-{\r
- struct X_template *G;\r
- G=fill_X_template ( name, p, "_G_"); \r
- sprintf (G->template_format , "fasta_seq");\r
- \r
- /*1: Get the sequence from another file if needed*/\r
- if ( strm (name, p))sprintf ( G->template_file, "%s",output_fasta_seqX (NULL,"w",S,NULL, seq_name2index (name, S)));\r
- else if ( strstr (p, "SEQFILE_"))\r
- {\r
- Sequence *ST;\r
- int i2;\r
- \r
- \r
- ST=main_read_seq (after_strstr ( p,"SEQFILE_G_"));\r
- \r
- i2=seq_name2index (name, ST);\r
- if ( i2!=-1)\r
- {\r
- sprintf ( G->template_file, "%s",output_fasta_seqX (NULL,"w",ST,NULL, i2));\r
- sprintf ( G->template_name, "%s", name);\r
- }\r
- free_sequence (ST, -1);\r
- }\r
- else sprintf (G->template_file, "%s", G->template_name);\r
- \r
- \r
- /*2: Put the template in VG->S*/\r
- if (!is_seq (G->template_file))\r
- {\r
- add_warning ( stderr, "\nWARNING: _G_ Template %s Could Not Be Found \n",p);\r
-\r
- free_X_template (G);\r
- return NULL;\r
- }\r
- else\r
- {\r
- (G->VG)->S=get_fasta_sequence (G->template_file, NULL);\r
- }\r
- return G;\r
-}\r
-\r
-\r
-char *seq2T_value ( Sequence *S, int n, char *value, char *type)\r
-{\r
- static char *rv_buf;\r
- X_template *X;\r
-\r
- if ( !rv_buf)rv_buf=vcalloc (100, sizeof(char));\r
- if (!(X=seq_has_template (S, n, type)))return NULL;\r
- else\r
- {\r
- if (strm (value, "template_file"))return X->template_file;\r
- else if ( strm (value, "template_name"))return X->template_name;\r
- else if ( strm (value, "seq_name"))return X->seq_name;\r
- else if (strm (type, "_P_"))\r
- {\r
- if ( strm (value, "pdb_id"))return (X->VP)->pdb_id;\r
- }\r
- else if ( strm (type, "_R_"))\r
- {\r
- if ( strm (value, "A"))\r
- {\r
- if ((X->VR)->A){sprintf ( rv_buf, "%d", (int)(X->VR)->A);return rv_buf;}\r
- else return NULL;\r
- }\r
- }\r
- \r
- }\r
- return NULL;\r
-}\r
-char *seq2P_pdb_id (Sequence *S, int n)\r
-{\r
- if (!S->T || !S->T[n] || !(S->T[n])->P ) return NULL;\r
- else return ((S->T[n])->P)->template_name;\r
-}\r
-\r
- \r
-char *seq2P_template_file(Sequence *S, int n)\r
-{\r
- \r
- return seq2T_value (S, n, "template_file", "_P_");\r
-}\r
-\r
-char *profile2P_template_file (Sequence *S, int n)\r
-{\r
- Alignment *A;\r
- int a;\r
- char *p;\r
- \r
- if ( !(A=seq2R_template_profile (S, n)))return NULL;\r
- for (a=0; a<A->nseq; a++)\r
- {\r
- if ((p=seq2P_template_file (A->S, a))!=NULL)return p;\r
- }\r
- return NULL;\r
-}\r
-Alignment * seq2R_template_profile (Sequence *S, int n)\r
-{\r
- \r
- \r
- return (Alignment *)atop(seq2T_value (S, n, "A", "_R_"));\r
-}\r
-char * seq2E_template_string (Sequence *S, int n)\r
-{\r
- struct X_template *T;\r
- \r
- if ( (T=seq_has_template (S, n, "_E_"))!=NULL)\r
- return ((T->VE)->S)->seq[0];\r
- else \r
- return NULL;\r
-}\r
-//add template\r
-int* seq2U_template (Sequence *S, int n)\r
-{\r
- struct X_template *T;\r
- \r
- if ( (T=seq_has_template (S, n, "_U_"))!=NULL)\r
- return (T->VU)->list;\r
- else \r
- return NULL;\r
-}\r
-char * seq2T_template_string (Sequence *S, int n)\r
-{\r
- struct X_template *T;\r
- \r
- if ( (T=seq_has_template (S, n, "_T_"))!=NULL)\r
- return ((T->VT)->S)->seq[0];\r
- else \r
- return NULL;\r
-}\r
-\r
-struct X_template* seq_has_template ( Sequence *S, int n, char *mode)\r
-{\r
- Template *T;\r
- \r
- if ( !S || !mode) return NULL;\r
- else if ( n<0 || n>=S->nseq)return NULL;\r
- else if ( !(S->T)) return NULL;\r
- else if ( !(S->T[n]))return NULL;\r
-\r
- T=S->T[n];\r
- //ADD STRUCTURE\r
- //add template\r
- if ( strm (mode, "_P_"))return T->P;\r
- else if ( strm (mode, "_F_"))return T->F;\r
- else if ( strm (mode, "_S_"))return T->S;\r
- else if ( strm (mode, "_R_"))return T->R;\r
- else if ( strm (mode, "_T_"))return T->T;\r
- else if ( strm (mode, "_E_"))return T->E;\r
- else if ( strm (mode, "_U_"))return T->U;\r
- else if ( strm (mode, "_G_"))return T->G;\r
- else return NULL;\r
-} \r
-\r
-char ** name2random_subset (char **in_name, int n_in, int n_out)\r
-{\r
- char **out_name;\r
- \r
- int **list;\r
- int a,max;\r
- \r
- \r
- vsrand (0);\r
- max=n_in*10000;\r
- out_name=declare_char (n_out,MAXNAMES+1 );\r
- list=declare_int (n_in, 2);\r
- \r
- for (a=0; a<n_in; a++)\r
- {\r
- list[a][0]=a;\r
- list[a][1]=rand ()%max;\r
- }\r
- sort_int ( list,2, 1, 0, n_in-1);\r
- \r
- for ( a=0; a<n_out; a++)\r
- sprintf ( out_name[a], "%s", in_name[list[a][0]]);\r
- free_int (list, -1);\r
- return out_name;\r
-}\r
- \r
-Alignment * aln2random_order (Alignment *A)\r
-{\r
- \r
- char **name_list;\r
- \r
- name_list=name2random_subset (A->name, A->nseq, A->nseq);\r
- A=reorder_aln (A, name_list, A->nseq);\r
- free_char (name_list, -1);\r
- return A;\r
-}\r
-Alignment *aln2jacknife (Alignment *A, int nseq, int len)\r
-{\r
- int a, b;\r
- \r
- if (nseq!=0 && nseq<A->nseq)\r
- {\r
- char **name;\r
- \r
- name=name2random_subset (A->name, A->nseq, nseq);\r
- A=reorder_aln (A, name, nseq);\r
- free_char (name, -1);\r
- }\r
- \r
- if (len!=0 && len<A->len_aln)\r
- {\r
- int **l;\r
- Alignment *B;\r
- \r
- l=declare_int (A->len_aln, 2);\r
- for (a=0; a< A->len_aln; a++)\r
- {\r
- l[a][0]=a; \r
- l[a][1]=rand()%(A->len_aln*1000);\r
- }\r
- sort_int ( l,2, 1, 0, A->len_aln-1);\r
- B=copy_aln (A, NULL);\r
- for ( a=0; a< len; a++)\r
- {\r
- for ( b=0; b<A->nseq; b++)\r
- {\r
- A->seq_al[b][a]=B->seq_al[b][l[a][0]];\r
- }\r
- }\r
- for (b=0; b<A->nseq; b++)A->seq_al[b][len]='\0';\r
- free_aln (B);\r
- free_int (l, -1);\r
- }\r
- return A;\r
-}\r
-Alignment * aln2scramble_seq (Alignment *A)\r
-{\r
- int **list;\r
- char **name_list;\r
- int a,max;\r
-\r
- max=100*A->nseq;\r
- vsrand (0);\r
- \r
- list=declare_int (A->nseq, 2);\r
- name_list=vcalloc (A->nseq, sizeof (char*));\r
- \r
- \r
- for (a=0; a<A->nseq; a++)\r
- {\r
- list[a][0]=a;\r
- list[a][1]=rand ()%max;\r
- }\r
- sort_int ( list,2, 1, 0, A->nseq-1);\r
- \r
- for ( a=0; a< A->nseq; a++)\r
- name_list[a]=A->seq_al[a];\r
- for (a=0; a<A->nseq; a++)\r
- {\r
- A->seq_al[a]=name_list[list[a][0]];\r
- }\r
- vfree (name_list);\r
- free_int (list, -1);\r
- return aln2random_order (A);\r
-}\r
- \r
- \r
-\r
-Alignment * reorder_aln ( Alignment *A, char **name, int nseq)\r
- {\r
- int a,sn;\r
- Alignment *BUF;\r
- int n=0;\r
- int *tpp_int;\r
- \r
- if ( name==NULL)return aln2random_order(A);\r
- \r
- \r
- BUF=copy_aln ( A,NULL); \r
- for ( a=0; a<nseq; a++)\r
- {\r
- sn =name_is_in_list ( name[a],BUF->name, A->nseq,STRING);\r
- if ( sn==-1)\r
- {\r
- ;\r
- }\r
- else\r
- {\r
- \r
- \r
- SWAPP(A->order[n], BUF->order[sn], tpp_int);\r
- sprintf ( A->name[n], "%s", BUF->name[sn]); \r
- sprintf ( A->seq_al[n], "%s",BUF->seq_al[sn]);\r
- sprintf ( A->seq_comment[n], "%s", BUF->seq_comment[sn]);\r
- \r
- n++;\r
- \r
- }\r
- }\r
- \r
- for ( a=n; a< A->nseq; a++)A->name[a][0]=A->seq_al[a][0]='\0';\r
- A->nseq=n;\r
- \r
- if ( A->A)A->A=reorder_aln(A->A, name, nseq);\r
- free_aln (BUF);\r
- return A;\r
- } \r
-Sequence * reorder_seq_2 ( Sequence *A, int **order,int field, int nseq)\r
- {\r
- char **name;\r
- int a;\r
- \r
- if (!A || !order) return A;\r
- name=declare_char (A->nseq, 100);\r
- for (a=0; a<nseq; a++)\r
- sprintf ( name[a], "%s", A->name[order[a][field]]);\r
- A=reorder_seq (A, name,nseq);\r
- free_char (name, -1);\r
- return A;\r
- }\r
-Sequence * reorder_seq ( Sequence *A, char **name, int nseq)\r
- {\r
- int a,sn;\r
- Sequence *nA;\r
-\r
- \r
- nA=duplicate_sequence (A);\r
- \r
- \r
- for ( a=0; a< nseq; a++)\r
- {\r
- sn=name_is_in_list (name[a] ,nA->name, nA->nseq, 100);\r
- if (sn==-1)continue;\r
- \r
- if ( nA->file) sprintf ( A->file[a], "%s", nA->file[sn]);\r
- \r
- if ( nA->seq_comment)sprintf ( A->seq_comment[a], "%s", nA->seq_comment[sn]);\r
- if ( nA->aln_comment)sprintf ( A->aln_comment[a], "%s", nA->aln_comment[sn]);\r
- sprintf ( A->seq[a], "%s", nA->seq[sn]);\r
- A->len[a]=nA->len[sn];\r
- sprintf ( A->name[a], "%s", nA->name[sn]);\r
- A->T[a][0]=nA->T[sn][0];\r
- }\r
- A->nseq=nseq;\r
- free_sequence (nA, nA->nseq);\r
- \r
- return A;\r
-} \r
-\r
-char * concatenate_seq ( Sequence *S, char *conc, int *order)\r
- {\r
- int a;\r
- \r
- vfree (conc);\r
- conc=vcalloc ( S->nseq*S->max_len, sizeof (char));\r
-\r
- for ( a=0; a< S->nseq; a++)\r
- {\r
- conc=strcat ( conc, S->seq[order[a]]);\r
- }\r
- return conc;\r
-\r
- }\r
-\r
-\r
- \r
-\r
-Alignment * rotate_aln ( Alignment *A, char *name)\r
-{\r
- Alignment *B;\r
- int a, b;\r
- \r
- B=declare_aln2 (A->len_aln, A->nseq+1);\r
- for ( a=0; a< A->nseq; a++)\r
- for ( b=0; b< A->len_aln; b++)\r
- {\r
- B->seq_al[b][a]=A->seq_al[a][b];\r
- }\r
- for (a=0; a< A->len_aln; a++)\r
- if (name && name[0])sprintf ( B->name[a], "%s_%s%d", name, (a<9)?"0":"",a+1);\r
- else\r
- sprintf ( B->name[a], "%d", a+1);\r
- \r
- \r
- for (a=0; a< A->len_aln; a++)B->seq_al[a][A->nseq]='\0';\r
- B->len_aln=A->nseq;\r
- B->nseq=A->len_aln;\r
- /*free_aln (A);*/\r
- return B;\r
-}\r
- \r
-Alignment * invert_aln ( Alignment *A)\r
-{\r
- char *buf;\r
- int l, a, b, c;\r
- \r
- for ( a=0; a< A->nseq; a++)\r
- {\r
- l=strlen ( A->seq_al[a]);\r
- buf=vcalloc ( l+1,sizeof (char) );\r
- \r
- for ( c=l-1,b=0; b< l; b++, c--)\r
- {\r
- buf[c]=A->seq_al[a][b];\r
- }\r
- buf[l]='\0';\r
- sprintf ( A->seq_al[a], "%s", buf);\r
- }\r
- vfree(buf);\r
- return A;\r
-}\r
-char * complement_string (char *s)\r
-{\r
- char *buf;\r
- int l, a, b, c;\r
- \r
- l=strlen (s);\r
- for ( b=0; b< l; b++)\r
- {\r
- char r;\r
- r=s[b];\r
- if ( r=='a')r='t';\r
- else if (r=='A')r='T';\r
- else if (r=='t')r='a';\r
- else if (r=='T')r='A';\r
- else if (r=='g')r='c';\r
- else if (r=='G')r='C';\r
- else if (r=='c')r='g';\r
- else if (r=='C')r='G';\r
- s[b]=r;\r
- }\r
- \r
- return invert_string (s);\r
-}\r
-Alignment * complement_aln ( Alignment *A)\r
-{\r
- char *buf;\r
- int l, a, b, c;\r
- \r
- for ( a=0; a< A->nseq; a++)\r
- {\r
- A->seq_al[a]=complement_string (A->seq_al[a]);\r
- }\r
-\r
- return A;\r
-}\r
-\r
-Alignment * extract_nol_local_aln(Alignment *A, int start, int max_end)\r
- {\r
- A=extract_aln ( A, start, max_end);\r
- A=trunkate_local_aln (A);\r
- return A;\r
- }\r
-\r
-Alignment * alnpos_list2block (Alignment *A, int n, char **in_list)\r
-{\r
- int *pos;\r
- int a;\r
- char **list;\r
- int list_declared=0;\r
- Alignment *B;\r
- \r
- if (check_file_exists (in_list[0]))\r
- {\r
- int mn;\r
- char ***tmp_list;\r
- \r
- mn=count_n_line_in_file (in_list[0]);\r
- list=declare_char (mn, 100);\r
- list_declared=1;\r
- tmp_list=file2list (in_list[0], " ");\r
- a=0;\r
- n=0;\r
- while (tmp_list[a])\r
- {\r
- if (tmp_list[a][1][0]!='!')\r
- {\r
- sprintf (list[n++], "%s", tmp_list[a][1]);\r
- }\r
- a++;\r
- }\r
- free_arrayN ((void **)tmp_list, 3);\r
- }\r
- else\r
- {\r
- list=in_list;\r
- }\r
- \r
- \r
- pos=vcalloc (A->len_aln, sizeof (int));\r
- for (a=0; a<n; a++)\r
- {\r
- \r
- if (strstr (list[a], "-"))\r
- {\r
- int start, end, x;\r
- x=sscanf (list[a], "%d-%d", &start, &end);\r
- if (x!=2 || !A || start<=0 || start>=end || end>A->len_aln+1)\r
- {\r
- add_warning ( stderr, "\nWARNING: Illegal coordinates in extract_pos_list [%s]", list[a]);\r
- return A;\r
- }\r
- start--; end--;\r
- for (a=start; a<end; a++)pos[a]=1;\r
- }\r
- else\r
- {\r
- int p;\r
- p=atoi (list[a]);\r
- if (p<1 || p>A->len_aln)\r
- {\r
- add_warning ( stderr, "\nWARNING: Illegal coordinates in extract_pos_list [%s]", list[a]);\r
- }\r
- p--;\r
- pos[p]=1;\r
- }\r
- } \r
- B=alnpos2block(A, pos, NULL);\r
- vfree (pos);\r
- if ( list_declared)free_char (list, -1);\r
- \r
- return B;\r
-}\r
-Alignment * aln2block (Alignment *A, int start, int end, Alignment *B)\r
-{\r
- if ( !A || start<=0 || start>=end || end>A->len_aln+1)\r
- {\r
- add_warning ( stderr, "\nWARNING: Illegal coordinates in extract_block start=%d end=%d len=%d [Note : [start-end[, with [1...n]", start, end, A->len_aln);\r
- return A;\r
- }\r
- else\r
- {\r
- int *pos, p;\r
- start--;\r
- end--;\r
- pos=vcalloc (A->len_aln, sizeof (int));\r
- for (p=start;p<end;p++)\r
- {\r
- pos[p]=1;\r
- }\r
- B=alnpos2block (A, pos, B);\r
- vfree (pos);\r
- return B;\r
- }\r
-}\r
-Alignment * alnpos2block (Alignment *A, int *pos, Alignment *B)\r
-{\r
- \r
- //extract a subset of B without over-writing A\r
- int a, b;\r
- \r
- B=copy_aln (A, B);\r
- B->len_aln=0;\r
- for (a=0; a<=A->len_aln; a++)\r
- {\r
- if ( pos[a]!=0 || a==A->len_aln)\r
- {\r
- for ( b=0; b<A->nseq; b++)\r
- B->seq_al[b][B->len_aln]=A->seq_al[b][a];\r
- if ( a!=A->len_aln)B->len_aln++;\r
- }\r
- }\r
- \r
- return B;\r
-}\r
-Alignment * extract_aln ( Alignment *A, int start, int end)\r
-{\r
- return extract_aln2 ( A, start, end, "cons");\r
-}\r
-\r
-Alignment * extract_aln2 ( Alignment *A, int in_start, int in_end, char *seq)\r
- {\r
- char *tmp;\r
- FILE *fp;\r
- \r
-\r
- tmp=vtmpnam (NULL);\r
- fp=vfopen (tmp, "w");\r
- fprintf ( fp, "%s %d %d\n", seq, in_start, in_end);\r
- vfclose (fp);\r
- return extract_aln3 (A,tmp);\r
- }\r
-Alignment * extract_aln3 ( Alignment *B, char *file)\r
- {\r
- int a, b, c;\r
- int start, end;\r
- int n, i, s, nline=0;\r
- FILE *fp;\r
- Alignment *A=NULL;\r
- int *col;\r
- char name[MAXNAMES];\r
- char line[VERY_LONG_STRING];\r
- int *offset;\r
- \r
- /*Reads in a file\r
- #comment\r
- ! seq_name offset\r
- seqname pos\r
- OR\r
- seqname start end[\r
- modifies the incoming alignment\r
- */\r
- \r
- offset=vcalloc ( B->nseq+1, sizeof (int));\r
- fp=vfopen (file,"r");\r
- while ( (c=fgetc(fp))!=EOF)\r
- {\r
- s=-1;\r
- fgets ( line, VERY_LONG_STRING,fp);\r
- if ( c=='!')\r
- {\r
- sscanf (line, "%s %d", name, &start);\r
- s=name_is_in_list (name,B->name,B->nseq,MAXNAMES);\r
- }\r
- if (s!=-1)\r
- offset[s]=start;\r
- }\r
- \r
- vfclose (fp);\r
- \r
- A=copy_aln (B, A); \r
- col=vcalloc ( A->len_aln, sizeof (int));\r
- \r
- fp=vfopen ( file, "r");\r
- while ( (c=fgetc(fp))!=EOF)\r
- {\r
- nline++;\r
- if ( c=='#' || c=='!')fgets ( line, VERY_LONG_STRING,fp);\r
- else\r
- {\r
- ungetc(c, fp);\r
- fgets ( line, VERY_LONG_STRING,fp);\r
- \r
- if (sscanf (line, "%s %d %d", name, &start, &end)==3);\r
- else if (sscanf (line, "%s %d", name, &start)==2)\r
- {\r
- end=start+1;\r
- }\r
- else\r
- {\r
- add_warning ( stderr, "\nWARNING: wrong format in coordinate file (line=%d)\n", nline);\r
- continue;\r
- }\r
- if ( end==0)end=A->len_aln+1;\r
-\r
- s=name_is_in_list (name,A->name,A->nseq,MAXNAMES);\r
- \r
- \r
- if ( s==-1 && !strm (name, "cons"))\r
- {\r
- add_warning ( stderr, "\nWARNING: Seq %s does not belong to the alignment (line %d)\n", name,nline);\r
- continue;\r
- }\r
- else if ( start>end)\r
- {\r
- add_warning ( stderr, "\nWARNING: Illegal coordinates [%s %d %d] (line %d)\n", name,start, end,nline);\r
- continue;\r
- }\r
- else\r
- {\r
- int done=0;\r
- if ( s!=-1)\r
- { \r
- start-=offset[s]-1;\r
- end-=offset[s]-1;\r
- }\r
- for (n=0, a=0; done!=1 && a< A->len_aln; a++)\r
- {\r
- i=(strm (name, "cons"))?1:!is_gap(A->seq_al[s][a]);\r
- \r
- n+=i;\r
- if (n>=start && n<end)\r
- {\r
- col[a]=1;\r
- }\r
- if (n>=end)done=1;\r
- //if (n>=start && n<end && !(i==0 && n==end-1))\r
- //{\r
- // col[a]=1;\r
- //}\r
- //else if ( n>=end)a=A->len_aln;\r
- }\r
- if ( done==0)\r
- {\r
- HERE ("Warning Missing positions in File %s",file );\r
- }\r
- }\r
- }\r
- }\r
- vfclose ( fp);\r
- \r
-\r
- \r
- /*Extract [start-end[*/\r
- for ( b=0,a=0; a< A->len_aln; a++)\r
- {\r
- if ( col[a])\r
- {\r
- for (c=0; c< A->nseq; c++)A->seq_al[c][b]=A->seq_al[c][a];\r
- b++;\r
- }\r
- }\r
- A->len_aln=b;\r
-\r
- for (c=0; c< A->nseq; c++)A->seq_al[c][A->len_aln]='\0';\r
- vfree (col);\r
- \r
- return A;\r
-\r
- }\r
-Alignment * trunkate_local_aln ( Alignment *A)\r
- {\r
- int a, b;\r
- int **pos;\r
- int **cache;\r
- int seq;\r
- \r
- \r
- cache=declare_int (return_max_int (A->order,read_size_int ( A->order,sizeof (int*)),0)+1,return_max_int (A->order,read_size_int ( A->order,sizeof (int*)),1)+A->len_aln+1); \r
- pos=aln2pos_simple(A,A->nseq);\r
- \r
- for ( b=0; b<A->len_aln; b++)\r
- for ( a=0; a< A->nseq; a++) \r
- {\r
- seq=A->order[a][0];\r
- if ( pos[a][b]<=0);\r
- else if ( pos[a][b]>0)\r
- {\r
- \r
- if (cache[seq][pos[a][b]]==0)cache[seq][pos[a][b]]++;\r
- else if ( cache[seq][pos[a][b]]>=1)\r
- { \r
- cache[seq][pos[a][b]]++;\r
- A->seq_al[a][b]='\0';\r
- }\r
- }\r
- }\r
- \r
- A->len_aln=get_shortest_string ( A->seq_al, A->nseq, NULL, NULL);\r
- pad_string_array ( A->seq_al, A->nseq, A->len_aln, '-');\r
- \r
- free_int (pos, -1);\r
- free_int ( cache,-1);\r
- \r
- \r
- return A;\r
- }\r
-\r
-int get_nol_aln_border ( Alignment *A, int start, int direction)\r
- {\r
- int a, b;\r
- int **pos;\r
- int **cache;\r
- int seq,end;\r
- \r
- /*This Function Returns the limit position for a non overlaping alignment*/\r
- \r
- cache=declare_int (return_max_int (A->order,read_size_int ( A->order,sizeof (int*)),0)+1,return_max_int (A->order,read_size_int ( A->order,sizeof (int)),1)+A->len_aln+1);\r
- pos=aln2pos_simple(A,A->nseq);\r
- end=(direction==GO_RIGHT)?A->len_aln:-1;\r
- \r
-\r
- for ( b=start; b!=end;b+=direction)\r
- for ( a=0; a< A->nseq; a++) \r
- {\r
- seq=A->order[a][0];\r
- if ( pos[a][b]<=0);\r
- else if ( pos[a][b]>0)\r
- {\r
- \r
- if (cache[seq][pos[a][b]]==0)cache[seq][pos[a][b]]++;\r
- else if ( cache[seq][pos[a][b]]>=1)\r
- { \r
- cache[seq][pos[a][b]]++;\r
- free_int(cache, -1);\r
- return b-direction;\r
- }\r
- }\r
- }\r
- \r
- free_int ( cache,-1);\r
- free_int (pos, -1);\r
- return end-direction;\r
- }\r
-\r
-\r
-\r
-\r
-\r
-char * extract_defined_seq ( char *in, int in_of, int in_start, int *aa_def, int dir, int *out_start, char *out)\r
- {\r
- int start=0, end,l;\r
- int b, c, d;\r
-\r
- \r
-\r
- if ( dir==GO_LEFT){start=in_start-1;}\r
- else if ( dir==GO_RIGHT){start=in_start+1;} \r
- \r
- end=start;\r
- while (aa_def[end]!=UNDEFINED)\r
- {\r
- end+=dir;\r
- }\r
- end-=dir;\r
- \r
- if (end<start)SWAP(end,start);\r
- \r
- l=strlen ( in);\r
- out_start[0]=-1;\r
- for (b=0,d=0,c=in_of;b<l; b++)\r
- {\r
- c+=1-is_gap(in[b]);\r
- if ( c>=start && c<=end)\r
- {\r
- if ( out_start[0]==-1)out_start[0]=c-!is_gap(in[b]);\r
- out[d++]=in[b];\r
- }\r
- }\r
- out[d]='\0';\r
- \r
- \r
- return out;\r
- }\r
-Alignment * concatenate_aln ( Alignment *A1, Alignment *A2, char *spacer)\r
-{\r
- Alignment *A;\r
- int a, i;\r
- \r
- A=declare_aln2( A1->nseq+A2->nseq , A1->len_aln+A2->len_aln+1);\r
- for ( a=0; a< A1->nseq; a++)\r
- {\r
- if ((i=name_is_in_list ( A1->name[a], A2->name, A2->nseq, 100))!=-1)\r
- {\r
- sprintf ( A->name[A->nseq], "%s", A1->name[a]);\r
- sprintf (A->seq_al[A->nseq], "%s%s%s", A1->seq_al[a],(spacer)?spacer:"", A2->seq_al[i]);\r
- A->nseq++;\r
- }\r
- else \r
- {\r
- char *buf;\r
- buf=generate_string (A2->len_aln, '-');\r
- sprintf ( A->name[A->nseq], "%s", A1->name[a]);\r
- sprintf (A->seq_al[A->nseq], "%s%s", A1->seq_al[a], buf);\r
- A->nseq++;\r
- vfree (buf);\r
- }\r
- }\r
- for ( a=0; a< A2->nseq; a++)\r
- {\r
- if ((i=name_is_in_list ( A2->name[a], A1->name, A1->nseq, 100))==-1)\r
- {\r
- char *buf;\r
- buf=generate_string (A1->len_aln, '-');\r
- sprintf ( A->name[A->nseq], "%s", A2->name[a]);\r
- sprintf (A->seq_al[A->nseq], "%s%s", buf, A2->seq_al[a]);\r
- A->nseq++;\r
- vfree (buf);\r
- }\r
- }\r
- A->len_aln=A1->len_aln+A2->len_aln;\r
- return A;\r
-}\r
-Alignment * aln_cat ( Alignment *A, Alignment *B)\r
- { \r
- int a;\r
- \r
- if ( A->nseq!=B->nseq) \r
- {\r
- fprintf ( stderr, "\nERROR IN ALN CAT: DIFFERENT NSEQ\n");\r
- myexit(EXIT_FAILURE);\r
- }\r
-\r
- A=realloc_alignment2(A, A->nseq,A->len_aln+B->len_aln+1);\r
- \r
- for ( a=0;a< A->nseq; a++)\r
- { \r
- strcat ( A->seq_al[a], B->seq_al[a]);\r
- }\r
- A->len_aln+=B->len_aln;\r
- return A;\r
- }\r
-int verify_aln ( Alignment *A, Sequence *S, char *message)\r
- {\r
- int a, b, c,s,r;\r
-\r
-\r
- for ( a=0;a< A->nseq; a++)\r
- {\r
- s=A->order[a][0];\r
- r=A->order[a][1];\r
- for ( b=0, c=0; b< A->len_aln; b++)\r
- {\r
- if ( !is_gap(A->seq_al[a][b]))\r
- {\r
- if (tolower(A->seq_al[a][b])!=tolower(S->seq[s][c+r]))\r
- {\r
- fprintf ( stderr, "\n%s\nResidue [%c %d, %c %d] line %d seq %d",message,A->seq_al[a][b], b,S->seq[s][c+r], c+r,a,s); \r
- output_Alignment_with_res_number(A, stderr);\r
- myexit(EXIT_FAILURE);\r
- return 0;\r
- }\r
- c++;\r
- }\r
- }\r
- }\r
- return 1;\r
- }\r
-\r
-Alignment *adjust_est_aln ( Alignment *PW, Alignment *M, int s)\r
-{\r
- /*This function reajusts M, threading M onto PW\r
- two seqences in PW\r
- s+1 seq in M\r
- \r
- seq 0 PW ----> 0->s-1 in M\r
- seq 1 PW ----> 1->s in M;\r
- \r
- */\r
- int a, b;\r
- static char **array;\r
-\r
- \r
- int top_M=0;\r
- int bottom_M=0;\r
- \r
- \r
- if ( array==NULL)\r
- {\r
- array=declare_char (500, 100000);\r
- }\r
-\r
- for ( a=0; a< PW->len_aln; a++)\r
- {\r
- if ( is_gap(PW->seq_al[0][a]))\r
- {\r
- for ( b=0; b< s; b++)\r
- array[b][a]='-';\r
- }\r
- else\r
- {\r
- for ( b=0; b< s; b++)\r
- array[b][a]=M->seq_al[b][top_M];\r
- top_M++;\r
- }\r
- \r
- if ( is_gap(PW->seq_al[1][a]))\r
- {\r
- array[s][a]='-';\r
- }\r
- else\r
- {\r
- \r
- array[s][a]=M->seq_al[s][bottom_M];\r
- bottom_M++;\r
- } \r
- }\r
- \r
- M->len_aln=PW->len_aln;\r
- for (a=0; a<s; a++)\r
- {\r
- for (b=0; b<PW->len_aln; b++)\r
- M->seq_al[a][b]=array[a][b];\r
- M->seq_al[a][b]='\0';\r
- }\r
-\r
-\r
- M->nseq=s+1;\r
- \r
- return M;\r
-}\r
-\r
-\r
-Alignment * rename_seq_in_aln (Alignment *A, char ***list)\r
-{\r
- int n, i;\r
- if ( !A)return A;\r
- \r
-\r
- \r
- n=0;\r
- while ( list[n][0][0])\r
- {\r
- if ( (i=name_is_in_list (list[n][0], A->name, A->nseq, 100))!=-1)\r
- {\r
- sprintf ( A->name[i], "%s", list[n][1]);\r
- }\r
- n++;\r
- }\r
- \r
- A->S=rename_seq_in_seq (A->S, list);\r
- return A;\r
-}\r
-Sequence * rename_seq_in_seq (Sequence *A, char ***list)\r
-{\r
- int n, i;\r
- if ( !A || !list)return A;\r
- \r
- n=0;\r
- while ( list[n][0][0])\r
- {\r
- if ( (i=name_is_in_list (list[n][0], A->name, A->nseq, 100))!=-1)\r
- {\r
- sprintf ( A->name[i], "%s", list[n][1]);\r
- }\r
- n++;\r
- }\r
- return A;\r
-}\r
-/********************************************************************/\r
-/* */\r
-/* FLOAT SIMILARITIES */\r
-/* */\r
-/* */\r
-/* */\r
-/********************************************************************/\r
-float get_seq_fsim ( char *string1, char *string2, char *ignore, char *similarity_set,int **matrix, int MODE )\r
- {\r
- int len, a, r1, r2, nr1=0, nr2=0;\r
- float pos=0, sim=0;\r
- \r
-\r
- len=MIN((strlen (string1)),(strlen (string2)));\r
- if ( len==0)return 0;\r
- \r
- for ( a=0; a< len; a++)\r
- {\r
- \r
- r1=string1[a];\r
- r2=string2[a];\r
- nr1+=!is_gap(r1);\r
- nr2+=!is_gap(r2);\r
- \r
- if ( !is_in_set (r1, ignore) && !is_in_set (r2, ignore))\r
- {\r
- pos++;\r
- if ( matrix)sim+=matrix[r1-'A'][r2-'A'];\r
- else if (is_in_same_group_aa(r1,r2,0, NULL,similarity_set))\r
- {\r
- sim++;\r
- }\r
- }\r
- }\r
- if ( MODE==UNGAPED_POSITIONS)return ( sim*100)/pos;\r
- else if ( MODE==ALIGNED_POSITIONS)return (sim*100)/len;\r
- else if ( MODE==AVERAGE_POSITIONS)return (sim*200)/(nr1+nr2);\r
- else\r
- {\r
- return 0;\r
- }\r
- \r
- }\r
-float get_seq_fsim2 ( char *string1, char *string2, char *ignore, char *in_mode)\r
- {\r
- int len1;\r
- int a;\r
- int p1, p2;\r
- int r1=0,r2=0;\r
- char *p;\r
- char mode[1000];\r
- float r=0, pos1, pos2, pos0, gap, sim;\r
- \r
-\r
- sprintf ( mode, "%s", in_mode);\r
- \r
- /*mode: <mat>__<sim_mode>\r
- mat: idscore to get the alignment done\r
- any legal cw matrix\r
- sim_mode: sim1->identities/matches\r
- sim2->identities/min len \r
- */\r
-\r
- \r
- if ( (p=strstr (mode, "_"))!=NULL)\r
- {\r
- p[0]='\0';\r
- p++;\r
- }\r
-\r
- \r
- if (strstr (mode, "idscore"))\r
- {\r
- static int **mat;\r
- if (!mat) mat=read_matrice ("blosum62mt");\r
- return idscore_pairseq (string1, string2, -12, -1, mat,mode);\r
- \r
- }\r
- \r
- len1=strlen (string1);\r
- for ( sim=pos1=pos2=pos0=gap=0,a=0; a< len1; a++)\r
- {\r
- r1=string1[a];\r
- r2=string2[a];\r
- p1=1-is_in_set (r1, ignore);\r
- p2=1-is_in_set (r2, ignore);\r
- pos1+=p1; pos2+=p2;\r
- if (p1 && p2)\r
- {\r
- pos0++;\r
- if (is_in_same_group_aa(r1,r2,0, NULL, mode))\r
- { \r
- sim++;\r
- }\r
- }\r
- else if (p1+p2==1)\r
- {\r
- gap++;\r
- }\r
- }\r
-\r
- if ( p==NULL || strm (p, "sim1") || strm (p, "sim"))\r
- {\r
- r=(pos0==0)?0:(sim*MAXID)/pos0;\r
- }\r
- else if ( strm (p, "sim2"))\r
- {\r
- r=(pos1==0 || pos2==0)?0:(sim*MAXID)/MIN(pos1,pos2);\r
- }\r
- else if ( strm (p, "sim3"))\r
- {\r
- r=(pos1==0 || pos2==0)?0:(sim*MAXID)/MAX(pos1,pos2);\r
- }\r
- else if ( strm (p, "gap1"))\r
- {\r
- r=(len1==0)?MAXID:(gap*MAXID)/len1;\r
- r=MAXID-r;\r
- }\r
- else if ( strm (p, "logid"))\r
- {\r
- r=logid_score (pos0, sim);\r
- }\r
-\r
- return r;\r
- \r
- } \r
-\r
-/********************************************************************/\r
-/* */\r
-/* ALIGNMENT ANALYSES */\r
-/* */\r
-/* */\r
-/* */\r
-/********************************************************************/\r
-int **dist_array2sim_array ( int **p, int max)\r
-{\r
- int s1, s2, a, b;\r
- s1=read_array_size ((void *)p, sizeof (void *));\r
- s2=read_array_size ((void*)p[0],sizeof (int));\r
- /* s2=read_array_size ((void*)p[0],sizeof (void *)); OLD before 64 BITS*/\r
- for ( a=0; a< s1; a++)\r
- for ( b=0; b< s2; b++)\r
- {\r
- p[a][b]=max-p[a][b];\r
- } \r
- return p;\r
-}\r
-\r
-int **sim_array2dist_array ( int **p, int max)\r
-{\r
- int s1, s2, a, b;\r
- s1=read_array_size ((void *)p, sizeof (void *));\r
- s2=read_array_size ((void*)p[0],sizeof (int));\r
-\r
- /*s2=read_array_size ((void*)p[0],sizeof (void *)); OLD before 64 Bits stuff*/ \r
- for ( a=0; a< s1; a++)\r
- for ( b=0; b< s2; b++)\r
- {\r
- p[a][b]=max-(int)p[a][b];\r
- } \r
- return p;\r
-}\r
-\r
-int **normalize_array (int **p, int max, int norm)\r
-{\r
-int s1, s2, a, b;\r
- s1=read_array_size ((void *)p, sizeof (void *));\r
- s2=read_array_size ((void*)p[0],sizeof (int));\r
- \r
- /*s2=read_array_size ((void*)p[0],sizeof (void *)); OLD before 64 Bits stuff*/ \r
- for ( a=0; a< s1; a++)\r
- for ( b=0; b< s2; b++)\r
- {\r
- p[a][b]=(p[a][b]*norm)/max;\r
- } \r
- return p;\r
-}\r
-\r
-int aln2most_similar_sequence ( Alignment *A, char *mode)\r
-{\r
- int **w;\r
- int a, b;\r
- int avg, best_avg=0, best_seq=0;\r
- char *buf;\r
- int coverage;\r
-\r
- \r
- if ( !A) return -1;\r
- else if ( A->nseq==1)return 0;\r
- else\r
- {\r
- buf=vcalloc ( A->len_aln+1, sizeof (char));\r
- w=get_sim_aln_array ( A, mode);\r
- \r
- for ( a=0; a< A->nseq; a++)\r
- {\r
- sprintf ( buf, "%s", A->seq_al[a]);\r
- ungap(buf);\r
- coverage=(strlen(buf)*MAXID)/A->len_aln;\r
- \r
- for ( avg=0,b=0; b< A->nseq; b++)avg+=w[a][b]*coverage;\r
- if ( avg>best_avg){best_avg=avg; best_seq=a;}\r
- }\r
- free_int (w, -1);\r
- vfree (buf);\r
- return best_seq;\r
- }\r
- \r
-}\r
-\r
-\r
-\r
-int aln2coverage ( Alignment *A, int ref_seq)\r
-{\r
- int a,b;\r
- int cov_pos=0, npos=0;\r
-\r
- for ( a=0; a< A->len_aln; a++)\r
- {\r
- if ( !is_gap ( A->seq_al[ref_seq][a]))\r
- {\r
- npos++;\r
- for ( b=0; b< A->nseq; b++)\r
- {\r
- if ( b!=ref_seq && !is_gap ( A->seq_al[b][a])){cov_pos++;break;}\r
- }\r
- }\r
- }\r
- return (int) (npos==0)?0:(( MAXID*cov_pos)/A->len_aln);\r
-}\r
- \r
-\r
-int sub_aln2sim ( Alignment *A, int *ns, int **ls, char *mode)\r
-{\r
- int a, b, n;\r
- float avg;\r
-\r
- n=0; avg=0;\r
- if (!A || (ns==NULL && A->nseq<2))return -1;\r
- else if (ns==NULL)\r
- {\r
- for (a=0; a< A->nseq-1; a++)\r
- for ( b=a+1; b< A->nseq;b++, n++)\r
- avg+=generic_get_seq_sim (A->seq_al[a], A->seq_al[b], NULL, mode);\r
- }\r
- else\r
- {\r
- for (a=0; a<ns[0]; a++)\r
- for (b=0; b< ns[1]; b++, n++)\r
- {\r
- avg+=generic_get_seq_sim (A->seq_al[ls[0][a]], A->seq_al[ls[1][b]], NULL, mode);\r
- }\r
- }\r
- return (int)(n==0)?0:((float)avg/(float)n);\r
-}\r
-int sub_aln2max_sim ( Alignment *A, int *ns, int **ls, char *mode)\r
-{\r
- int a, b, n;\r
- float avg;\r
-\r
- n=0; avg=0;\r
- if (!A || (ns==NULL && A->nseq<2))return -1;\r
- else if (ns==NULL)\r
- {\r
- for (a=0; a< A->nseq-1; a++)\r
- for ( b=a+1; b< A->nseq;b++, n++)\r
- avg=MAX(avg,generic_get_seq_sim (A->seq_al[a], A->seq_al[b], NULL, mode));\r
- }\r
- else\r
- {\r
- for (a=0; a<ns[0]; a++)\r
- for (b=0; b< ns[1]; b++, n++)\r
- {\r
- avg=MAX(avg,generic_get_seq_sim (A->seq_al[ls[0][a]], A->seq_al[ls[1][b]], NULL, mode));\r
- }\r
- }\r
- return avg;\r
-} \r
-\r
-\r
-double aln2entropy (Alignment *A, int *in_ls, int in_ns, float gap_threshold)\r
-{\r
- int ns, a, s, col, r,ncol;\r
- int *ls;\r
- double *count;\r
- double entropy=0;\r
- float ng;\r
- \r
- ls=vcalloc ( A->nseq, sizeof (int));\r
- count=vcalloc ( 26, sizeof (double));\r
- \r
- \r
- if ( in_ls)\r
- {\r
- ns=in_ns;\r
- for ( a=0; a< ns; a++)ls[a]=in_ls[a];\r
- }\r
- else \r
- {\r
- ns=A->nseq;\r
- for ( a=0; a< ns; a++)ls[a]=a;\r
- }\r
- \r
- if ( ns==0)\r
- {\r
- vfree(ls);vfree(count);return 0;\r
- }\r
- for (ncol=0,col=0; col<A->len_aln; col++)\r
- {\r
- for (ng=0,a=0; a< ns; a++) \r
- {\r
- s=ls[a];\r
- ng+=is_gap(A->seq_al[s][col]);\r
- }\r
- ng/=ns;\r
- if ( ng>gap_threshold)continue;\r
- \r
- ncol++;\r
- \r
- for ( a=0; a<ns; a++)\r
- {\r
- s=ls[a];\r
- r=tolower(A->seq_al[s][col]);\r
- if (!is_gap(r))count[r-'a']++;\r
- }\r
- for (a=0; a<26; a++)\r
- {\r
- if ( count[a]==0);\r
- else \r
- {\r
- count[a]/=(double)ns;\r
- \r
- entropy+=count[a]*log(count[a]);\r
- count[a]=0;\r
- }\r
- }\r
- }\r
- entropy/=-ncol;\r
- vfree (ls); vfree(count);\r
- \r
- return entropy;\r
-}\r
-int aln2sim ( Alignment *A, char *mode)\r
-{\r
- return sub_aln2sim ( A, NULL, NULL, mode);\r
- /*\r
- if ( !A || A->nseq<2) return -1;\r
- w=get_sim_aln_array ( A, mode);\r
-\r
- for (c=0, a=0; a< A->nseq-1; a++)\r
- for ( b=a+1; b< A->nseq; b++, c++)\r
- {\r
- avg+=(float)w[a][b];\r
- }\r
- free_int (w, -1);\r
- return (int)((float)avg/(float)c);\r
- */\r
-}\r
-\r
-int aln_is_aligned ( Alignment *A)\r
-{\r
- int a, b;\r
- \r
- if ( !A)return 0;\r
- for (a=0; a< A->nseq; a++)\r
- for ( b=A->len_aln-1; b>0; b--)\r
- {\r
- if (!is_gap(A->seq_al[a][b]) && is_gap(A->seq_al[a][b-1]))return 1;\r
- }\r
- return 0;\r
-}\r
- \r
-\r
-int seq2aln2sim_old ( char *seq1, char *seq2, char *mode_aln, char *mode_id)\r
-{\r
- Alignment *A;\r
- int sim;\r
-\r
- A=align_two_sequences (seq1, seq2, "pam250mt", -10, -1, mode_aln);\r
- sim=aln2sim (A, mode_id);\r
- free_aln (A);\r
- return sim;\r
-}\r
-int seq2aln2sim ( char *seq1, char *seq2, char *mode_aln, char *mode_id)\r
-{\r
- Alignment *A;\r
- int sim;\r
- static int gop;\r
- \r
- if (!gop)\r
- {\r
- int **m;\r
- m=read_matrice ("blosum62mt");\r
- gop=get_avg_matrix_mm(m, AA_ALPHABET)*10;\r
- free_int (m, -1);\r
- }\r
- \r
- A=align_two_sequences (seq1, seq2, "blosum62mt",gop,-1, mode_aln);\r
- sim=aln2sim (A, mode_id);\r
- free_aln (A);\r
- return sim;\r
-}\r
-int* get_cdna_seq_winsim ( int *cache, char *string1, char *string2, char *ignore, char *mode,int *w )\r
- {\r
- int len1, len2;\r
- int a, x;\r
-\r
- \r
- len1=strlen (string1);\r
- len2=strlen (string2);\r
- \r
- if ( len1!=len2)\r
- {\r
- fatal_exit( stderr,EXIT_FAILURE, "\nTHE TWO cDNAs DO NOT HAVE THE SAME LENGTH [FATAL:get_cdna_seq_sim:%s", PROGRAM);\r
- }\r
- \r
- x=get_cdna_seq_sim(cache, string1, string2, ignore, "");\r
- for ( a=0; a< len1; a++)\r
- w[a]=x;\r
-\r
- add_warning (stderr, "\nWARNING: winsim not implemented for cDNA");\r
- return w;\r
- }\r
-\r
-int get_cdna_seq_sim ( int *cache, char *string1, char *string2, char *ignore, char *mode)\r
- {\r
- int len1;\r
- int len2;\r
- int a;\r
- int pos=0;\r
- int sim=0;\r
- char r1=0, r2=0;\r
- \r
- len1=strlen (string1);\r
- len2=strlen (string2);\r
-\r
-\r
- \r
- if ( len1!=len2)\r
- {\r
- fprintf ( stderr, "\nTHE TWO cDNAs DO NOT HAVE THE SAME LENGTH [FATAL:get_cdna_seq_sim:%s", PROGRAM);\r
- crash("");\r
- }\r
- \r
- for ( a=0; a< len1;)\r
- {\r
- \r
- if ( cache[a]==0){a++;continue;}\r
- else if ( cache[a]==1)\r
- {\r
- \r
- r1=translate_dna_codon (string1+a, 'x');\r
- r2=translate_dna_codon (string2+a, 'x');\r
- a+=3;\r
- }\r
- \r
- if ( !is_in_set (r1, ignore) && !is_in_set (r2, ignore))\r
- {\r
- pos++;\r
- if (is_in_same_group_aa(r1,r2,0, NULL,mode+4))\r
- {\r
- sim++;\r
- }\r
- }\r
- }\r
-\r
-\r
-\r
- if (pos==0)\r
- return 0;\r
- else \r
- return (int) (sim*MAXID)/pos;\r
- \r
- } \r
-\r
-int* get_seq_winsim ( char *string1, char *string2, char *ignore, char *mode, int*w)\r
- {\r
- int len1, len2, len;\r
- int left, right;\r
- int a,b;\r
- int sim=0;\r
- int window;\r
- int r1, r2;\r
-\r
- len1=strlen (string1);\r
- len2=strlen (string2);\r
- window=atoi(mode);\r
- len=2*window+1;\r
- \r
- if ( len1!=len2)return 0;\r
- if (window==0 || (window*2+1)>=len1)\r
- {\r
- sim=get_seq_sim (string1, string2, ignore, "");\r
- for (a=0; a<len1; a++)w[a]=sim;\r
- return w;\r
- }\r
- \r
-\r
- for ( a=0; a< len1; a++)\r
- {\r
- \r
- left =MAX(0, a-window);\r
- right=MIN(len1, left+len);\r
- for (sim=0,b=left; b<right; b++)\r
- {\r
- r1=string1[b];\r
- r2=string2[b];\r
- if ( !is_in_set (r1, ignore) && !is_in_set (r2, ignore))\r
- {\r
- if (r1==r2)sim++;\r
- }\r
- }\r
- w[a]=(sim*MAXID)/len;\r
- }\r
- return w;\r
- }\r
-\r
-\r
-int get_seq_sim ( char *string1, char *string2, char *ignore, char *in_mode)\r
- {\r
- int len1;\r
- int a;\r
- int pos1, pos2, pos0,gap=0, sim;\r
- int p1, p2;\r
- int r=0,r1=0,r2=0;\r
- char *p;\r
- static char *mode;\r
- \r
- if (!mode)mode=vcalloc (100, sizeof (char));\r
- else mode[0]='\0';\r
- if (in_mode)\r
- {\r
- while (in_mode[0]=='_')in_mode++;\r
- sprintf ( mode, "%s", in_mode);\r
- }\r
- \r
- /*mode: <mat>__<sim_mode>\r
- mat: idscore to get the alignment done\r
- any legal cw matrix\r
- sim_mode: sim1->identities/matches\r
- sim2->identities/min len \r
- */\r
-\r
- \r
- if ( (p=strstr (mode, "_"))!=NULL)\r
- {\r
- p[0]='\0';\r
- p++;\r
- }\r
-\r
- \r
- if (strstr (mode, "idscore"))\r
- {\r
- static int **mat;\r
- if (!mat) mat=read_matrice ("blosum62mt");\r
- return idscore_pairseq (string1, string2, -12, -1, mat,mode);\r
- \r
- }\r
- len1=strlen (string1);\r
- for ( sim=pos1=pos2=pos0=0,a=0; a< len1; a++)\r
- {\r
- r1=string1[a];\r
- r2=string2[a];\r
- p1=1-is_in_set (r1, ignore);\r
- p2=1-is_in_set (r2, ignore);\r
- \r
- pos1+=p1; pos2+=p2;\r
- if (p1 && p2)\r
- {\r
- pos0++;\r
- if (is_in_same_group_aa(r1,r2,0, NULL, mode))\r
- { \r
- sim++;\r
- }\r
- }\r
- else if (p1+p2==1)\r
- {\r
- gap++;\r
- }\r
- }\r
- \r
- if ( strstr (mode, "cov"))\r
- {\r
- r=(pos0+gap==0)?0:(pos0*MAXID)/(pos0+gap);\r
- }\r
- else if ( p==NULL || strm (p, "sim1") || strm (p, "sim"))\r
- {\r
- r=(pos0==0)?0:(sim*MAXID)/pos0;\r
- }\r
- else if ( strm (p, "sim2"))\r
- {\r
- r=(pos1==0 || pos2==0)?0:(sim*MAXID)/MIN(pos1,pos2);\r
- }\r
- else if ( strm (p, "sim3"))\r
- {\r
- r=(pos1==0 || pos2==0)?0:(sim*MAXID)/MAX(pos1,pos2);\r
- }\r
- else if ( strm (p, "gap1"))\r
- {\r
- r=(len1==0)?MAXID:(gap*MAXID)/len1;\r
- r=MAXID-r;\r
- }\r
- else if ( strm (p, "logid"))\r
- {\r
- r=logid_score (pos0, sim);\r
- }\r
- else if ( strstr (mode, "sim"))\r
- {\r
- r=(pos0==0)?0:(sim*MAXID)/pos0;\r
- }\r
-\r
- \r
- return r;\r
- \r
- } \r
-int get_seq_sim_2 ( char *string1, char *string2, char *ignore, char **gr, int ng)\r
- {\r
- int len1;\r
- int len2;\r
- int a;\r
- int pos=0;\r
- int sim=0;\r
- char r1, r2;\r
- \r
- \r
- len1=strlen (string1);\r
- len2=strlen (string2);\r
- \r
- if ( len1!=len2)return 0;\r
- \r
- for ( a=0; a< len1; a++)\r
- {\r
- r1=string1[a];\r
- r2=string2[a];\r
- if ( !is_in_set (r1, ignore) && !is_in_set (r2, ignore))\r
- {\r
- pos++;\r
- if (is_in_same_group_aa(r1,r2,ng, gr, NULL))\r
- {\r
- sim++;\r
- }\r
- }\r
- }\r
- \r
- if (pos==0)\r
- return 0;\r
- else \r
- return (int) (sim*MAXID)/pos;\r
- \r
- }\r
-\r
-int get_seq_sim_3 ( char *string1, char *string2, char *ignore, int **mat)\r
- {\r
- int len1;\r
- int len2;\r
- int a;\r
- \r
- int sim=0;\r
- char r1, r2;\r
- \r
- \r
- len1=strlen (string1);\r
- len2=strlen (string2);\r
- \r
- if ( len1!=len2)return 0;\r
- \r
- for ( a=0; a< len1; a++)\r
- {\r
- r1=string1[a];\r
- r2=string2[a];\r
- if ( !is_in_set (r1, ignore) && !is_in_set (r2, ignore))\r
- {\r
- sim+=mat[r1-'A'][r2-'A'];\r
- }\r
- }\r
- return sim;\r
- \r
- } \r
-int * get_aln_col_weight ( Alignment *A, char *mode)\r
- {\r
- int a, b;\r
- char *col;\r
- int *weight;\r
- \r
- col=vcalloc ( A->nseq, sizeof (int));\r
- weight=vcalloc (A->len_aln, sizeof (int));\r
- \r
- for (a=0; a< A->len_aln; a++)\r
- {\r
- for ( b=0; b< A->nseq; b++)\r
- col[b]=A->seq_al[b][a];\r
- weight[a]=(find_group_aa_distribution (col, A->nseq,0,NULL,NULL, mode )*MAXID)/A->nseq; \r
- }\r
- vfree (col);\r
- return weight;\r
- \r
- } \r
- \r
-int analyse_aln_column ( Alignment *B, int col)\r
- {\r
-\r
- char r=' ';\r
- int a, b, c=0;\r
- static char *mat;\r
- static int ng_cw_star;\r
- static char **cw_star;\r
- int *cw_star_count;\r
- \r
- static int ng_cw_col;\r
- static char **cw_col;\r
- int *cw_col_count;\r
- \r
- static int ng_cw_dot;\r
- static char **cw_dot;\r
- int *cw_dot_count;\r
- \r
-\r
-\r
-\r
- \r
- \r
- if ( !B->S || !(B->S)->type)B= get_aln_type (B);\r
- \r
- if ( !mat)mat=vcalloc ( STRING, sizeof (char));\r
-\r
- if ( !ng_cw_star)\r
- {\r
- cw_star=make_group_aa ( &ng_cw_star, strcpy ( mat,"idmat"));\r
- cw_col=make_group_aa ( &ng_cw_col, strcpy (mat,"clustalw_col"));\r
- cw_dot=make_group_aa ( &ng_cw_dot, strcpy (mat, "clustalw_dot"));\r
- }\r
-\r
- cw_star_count=vcalloc (ng_cw_star, sizeof (int));\r
- cw_col_count=vcalloc ( ng_cw_col, sizeof (int));\r
- cw_dot_count=vcalloc (ng_cw_dot, sizeof (int));\r
- \r
- for ( a=0; a< B->nseq; a++)\r
- {\r
- c=tolower (B->seq_al[a][col]);\r
- if (is_gap(c)){r=' ';break;}\r
- \r
- for ( b=0; b< ng_cw_star; b++)\r
- cw_star_count[b]+=is_in_set (c, cw_star[b]); \r
- for ( b=0; b< ng_cw_col; b++)\r
- cw_col_count[b]+=is_in_set (c, cw_col[b]);\r
- for ( b=0; b< ng_cw_dot; b++)\r
- cw_dot_count[b]+=is_in_set (c, cw_dot[b]);\r
- }\r
- \r
- \r
- \r
- \r
- \r
- if ( !is_gap(c) && r==' ')\r
- for ( b=0; b< ng_cw_star; b++)if ( cw_star_count[b]==B->nseq){r='*'; break;}\r
- if ( !is_gap(c) && r==' ' && !(strm((B->S)->type, "DNA")||strm ((B->S)->type,"RNA")))\r
- for ( b=0; b< ng_cw_col ; b++)if ( cw_col_count [b]==B->nseq){r=':'; break;}\r
- if ( !is_gap(c) && r==' ' && !(strm((B->S)->type, "DNA")||strm ((B->S)->type,"RNA")))\r
- for ( b=0; b< ng_cw_dot ; b++)if ( cw_dot_count [b]==B->nseq){r='.'; break;}\r
- \r
- \r
- \r
- vfree(cw_star_count);\r
- vfree(cw_col_count);\r
- vfree(cw_dot_count); \r
- \r
- return r;\r
- }\r
-\r
-\r
-int ** get_cov_aln_array ( Alignment *A, char *mode)\r
-{\r
- int **w;\r
- int a, b, c, t;\r
- \r
- w=declare_int ( A->nseq, A->nseq);\r
- \r
- \r
- for ( a=0; a< A->nseq-1; a++)\r
- {\r
- w[a][a]=100;\r
- for ( t=0,b=a+1; b< A->nseq; b++)\r
- {\r
- for ( c=0; c< A->len_aln; c++)\r
- {\r
- t+=(!is_gap(A->seq_al[a][c]) &&!is_gap(A->seq_al[b][c]));\r
- }\r
- w[a][b]=w[b][a]=(t*100)/A->len_aln;\r
- }\r
- }\r
- return w;\r
-}\r
-\r
-int ** get_cov_master_aln_array ( Alignment *A,int n, char *mode)\r
-{\r
- int **w;\r
- int b, c, t;\r
- \r
- w=declare_int ( A->nseq, A->nseq);\r
- \r
- \r
- for (b=0; b< A->nseq; b++)\r
- {\r
- \r
- for (t=0, c=0; c< A->len_aln; c++)\r
- {\r
- t+=(!is_gap(A->seq_al[n][c]) &&!is_gap(A->seq_al[n][c]));\r
- }\r
- w[n][b]=w[b][n]=(t*100)/A->len_aln;\r
- }\r
- \r
- return w;\r
-}\r
-int ** get_sim_master_aln_array ( Alignment *A,int n, char *mode)\r
- {\r
- int **w;\r
- int a;\r
- \r
- w=declare_int ( A->nseq, A->nseq);\r
- \r
- \r
- for ( a=0; a< A->nseq; a++)\r
- {\r
- if ( strm (mode, "cdna"))\r
- w[n][a]=w[a][n]=get_cdna_seq_sim ( A->cdna_cache[0], A->seq_al[a], A->seq_al[n],GAP_LIST, mode); \r
- else\r
- w[n][a]=w[a][n]=get_seq_sim ( A->seq_al[n], A->seq_al[a],GAP_LIST, mode);\r
- }\r
- return w;\r
- }\r
-int ** get_dist_aln_array ( Alignment *A, char *mode)\r
-{\r
- \r
- int **w;\r
-\r
- w=get_sim_aln_array ( A, mode);\r
- return sim_array2dist_array(w,MAXID);\r
-}\r
-Sequence * seq2filter (Sequence *Sin, int min, int max)\r
-{\r
- int *keep;\r
- char *tmpfile;\r
- Sequence *S, *Sout;\r
- int a, b, sim;\r
- int **M;\r
- FILE *fp;\r
- int n;\r
-\r
- S=duplicate_sequence (Sin);\r
- for (a=0; a<S->nseq; a++)ungap(S->seq[a]);\r
- keep=vcalloc (S->nseq, sizeof (int));\r
- M=read_matrice ("blossum62mt");\r
- for (a=0; a<S->nseq; a++)\r
- {\r
- output_completion ( stderr, a, S->nseq, 100, "Distance Matrix Computation: ");\r
- for ( b=a+1; b<S->nseq; b++)\r
- {\r
- \r
- sim=idscore_pairseq(S->seq[a], S->seq[b],-10, -2,M, "sim");\r
- if ( sim>min && sim<max)keep[a]=keep[b]=1;\r
- fprintf ( stderr, "\nSim %d Min %d Max %d", sim, min, max);\r
- }\r
- }\r
-\r
- tmpfile=vtmpnam (NULL);\r
- fp=vfopen (tmpfile, "w");\r
- for (n=0,a=0; a< S->nseq; a++)\r
- if ( keep[a]) \r
- {\r
- fprintf ( fp, ">%s %s\n%s", S->name[a], S->seq_comment[a], S->seq[a]);\r
- n++;\r
- }\r
- vfclose (fp);\r
- if (n==0) return NULL;\r
- Sout=main_read_seq(tmpfile);\r
- free_int (M, -1); vfree (keep); free_sequence (S, -1);\r
- return Sout;\r
-}\r
-\r
-Alignment * grep_seq (Alignment *S,char *field, char *mode, char *string)\r
-{\r
- int a;\r
- FILE *fp;\r
- char *tmp;\r
- int n=0;\r
- \r
- tmp=vtmpnam (NULL);\r
- fp=vfopen (tmp, "w");\r
- \r
- if ( !strm(mode, "KEEP") && ! strm (mode, "REMOVE"))\r
- {\r
- add_warning ( stderr, "\nERROR: +grep <field> <KEEP|REMOVE> <string> [FATAL: %s]", PROGRAM);\r
- myexit (EXIT_FAILURE);\r
- }\r
- else if ( !strm(field, "SEQ") && ! strm (field, "COMMENT") && ! strm(field, "NAME"))\r
- {\r
- add_warning ( stderr, "\nERROR: +grep <NAME|COMMENT|SEQ> <mode> <string> [FATAL: %s]", PROGRAM);\r
- myexit (EXIT_FAILURE);\r
- }\r
- \r
-\r
- for (n=0, a=0; a< S->nseq; a++)\r
- {\r
- int found=0;\r
-\r
- if (strm(field, "NAME") && perl_strstr (S->name[a], string))found=1;\r
- else if (strm(field, "COMMENT") && S->seq_comment[a][0] && perl_strstr (S->seq_comment[a], string) )found=1;\r
- else if (strm(field, "SEQ") && perl_strstr (S->seq_al[a], string))found=1;\r
- \r
- if ( (strm (mode, "KEEP") && found) || (strm (mode, "REMOVE") && !found))\r
- {\r
- n++;\r
- fprintf (fp, ">%s", S->name[a]);\r
- if (S->seq_comment[a][0])fprintf (fp, " %s", S->seq_comment[a]);\r
- fprintf (fp, "\n%s\n", S->seq_al[a]);\r
- }\r
- }\r
- \r
- vfclose (fp);\r
- \r
- free_aln (S);\r
- if ( n==0) return NULL;\r
- else\r
- return main_read_aln (tmp, NULL);\r
-}\r
-\r
-Alignment * modify_seq (Alignment *S, char *field, char *string1, char *string2)\r
-{\r
- int a;\r
- FILE *fp;\r
- char *tmp;\r
-\r
- tmp=vtmpnam (NULL);\r
- fp=vfopen (tmp, "w");\r
- for ( a=0; a< S->nseq; a++)\r
- {\r
- if (strm(field, "NAME"))S->name[a]=substitute ( S->name[a], string1, string2); \r
- else if (strm(field, "COMMENT"))S->seq_comment[a]=substitute ( S->seq_comment[a], string1, string2); \r
- else if (strm(field, "SEQ"))S->seq_al[a]=substitute ( S->seq_al[a], string1, string2);\r
- fprintf (fp, ">%s", S->name[a]);\r
- if (S->aln_comment[a][0])fprintf (fp, " %s", S->aln_comment[a]);\r
- fprintf (fp, "\n%s\n", S->seq_al[a]);\r
- }\r
- vfclose (fp);\r
- free_aln (S);\r
- S=main_read_aln (tmp, NULL);\r
- return S;\r
-}\r
-\r
-int ** seq2sim_mat (Sequence *S, char *mode)\r
-{\r
- return seq2comp_mat ( S,mode, "sim");\r
-}\r
-int ** seq2cov_mat (Sequence *S, char *mode)\r
-{\r
- return seq2comp_mat ( S,mode, "cov");\r
-}\r
-\r
-int ** seq2comp_mat (Sequence *S, char *mode, char *comp_mode)\r
-{\r
- int a, b;\r
- int **sim;\r
- char file[1000];\r
- Alignment *A;\r
- char *name;\r
-\r
- \r
- /*Use pre_computed value if available in the current dir*/\r
-\r
- name=path2filename(S->file[0]);\r
- sprintf ( file, "%s%s.%s.%s_file", get_cache_dir(),name, mode, comp_mode);\r
- A=seq2aln(S,NULL, RM_GAP);\r
- if ( check_file_exists (file) && is_distance_matrix_file (file) && (sim=input_similarities(file, A, NULL))!=NULL)\r
- {\r
- display_input_filename (stderr, "SIMILARITY_MATRIX", "SIMILARITY_MATRIX_FORMAT_01", file, CHECK);\r
- fprintf ( stderr, "\n");\r
- }\r
- else\r
- {\r
- char mode2[1000];\r
- int **M;\r
- \r
- M=read_matrice (mode);\r
- sim=declare_int ( S->nseq, S->nseq);\r
- for ( a=0; a< S->nseq; a++)\r
- {\r
- ungap (S->seq[a]);\r
- sim[a][a]=100;\r
- }\r
-\r
- for ( a=0; a<S->nseq-1; a++)\r
- {\r
- \r
- output_completion4halfmat ( stderr, a, S->nseq, 100, "Similarity Matrix Computation: ");\r
- for ( b=a+1; b< S->nseq; b++)\r
- {\r
- sim[a][b]=sim[b][a]=idscore_pairseq(S->seq[a], S->seq[b],-12, -1,M, comp_mode);\r
- }\r
- }\r
- free_int (M,-1);\r
- sprintf ( mode2, "_memory_%ld", (long int)sim);\r
- output_similarities( file, A, mode2);\r
- display_output_filename (stderr, "SIMILARITY_MATRIX", "SIMILARITY_MATRIX_FORMAT_01", file, CHECK);\r
- fprintf ( stderr, "\n");\r
- }\r
- free_aln (A);\r
- return sim;\r
-}\r
-\r
-int ** fast_aln2sim_list (Alignment *A, char *mode, int *ns, int **ls)\r
-{\r
- int **simm;\r
- int p1, p2, p3, r1, r2;\r
- int gap,pos0,pos1,pos2,len,sim;\r
- int a, b, c, m, s=0,s1, s2, n;\r
- int free_ns=0;\r
-\r
- if (ns==NULL)\r
- {\r
- free_ns=1;\r
- ns=vcalloc (2, sizeof (int));\r
- ns[0]=ns[1]=A->nseq;\r
- ls=declare_int (2, A->nseq);\r
- for ( a=0; a< 2; a++)\r
- for (b=0; b<A->nseq; b++)\r
- ls[a][b]=b;\r
- }\r
- \r
-\r
- simm=declare_int (ns[0]*ns[1]+1, 3);\r
- \r
- if (strstr (mode, "sim1"))m=0;\r
- else if (strstr (mode, "sim2"))m=1;\r
- else if (strstr (mode, "sim3"))m=2;\r
- else if (strstr (mode, "gap1"))m=3;\r
- else if (strstr (mode, "cov1"))m=4;\r
- else if (strstr (mode, "logid"))m=5;\r
- else m=0;\r
- \r
- \r
-\r
- for (n=0,a=0; a<ns[0]; a++)\r
- {\r
- s1=ls[0][a];\r
- for ( b=0; b<ns[1]; b++, n++)\r
- {\r
- s2=ls[1][b];\r
- gap=pos0=pos1=pos2=len=sim=0;\r
- \r
- for ( c=0; c< A->len_aln; c++)\r
- {\r
- r1=tolower (A->seq_al[s1][c]);\r
- r2=tolower (A->seq_al[s2][c]);\r
- p1=(r1!='-')?1:0;\r
- p2=(r2!='-')?1:0;\r
- p3=p1+p2;\r
- if ( p3==0)continue;\r
- if ( p3==1)gap++;\r
- if ( r1==r2)sim++;\r
- pos1+=p1;\r
- pos2+=p2;\r
- pos0+=(p3==2)?1:0;\r
- len++;\r
- }\r
-\r
- if (m==0)s=(pos0==0)?0:(sim*MAXID)/pos0; //sim1\r
- else if (m==1) s=(MIN(pos1,pos2)==0)?0:(sim*MAXID)/MIN(pos1,pos2);//sim2\r
- else if (m==2) s=(MAX(pos1,pos2)==0)?0:(sim*MAXID)/MAX(pos1,pos2);//sim3\r
- else if (m==3) s=(len==0) ?0:((len-gap)*MAXID)/len;//gap1\r
- else if (m==4) s=(len==0) ?0:((pos0)*MAXID)/len; //cov\r
- else if (m==5) \r
- {\r
- s=logid_score ( sim, len);\r
- }\r
- simm[n][0]=s1;\r
- simm[n][1]=s2;\r
- simm[n][2]=s;\r
- }\r
- }\r
-\r
- if ( free_ns) {vfree(ns); free_int (ls, -1);}\r
- simm[n][0]=-1;\r
- return simm;\r
-}\r
-\r
-int ** fast_aln2sim_mat (Alignment *A, char *mode)\r
-{\r
- int **simm;\r
- int p1, p2, p3, r1, r2;\r
- int gap,pos0,pos1,pos2,len,sim;\r
- int a, b, c, m;\r
- \r
- simm=declare_int (A->nseq, A->nseq);\r
-\r
-\r
- \r
- if (strstr (mode, "sim1"))m=0;\r
- else if (strstr (mode, "sim2"))m=1;\r
- else if (strstr (mode, "sim3"))m=2;\r
- else if (strstr (mode, "gap1"))m=3;\r
- else if (strstr (mode, "cov1"))m=4;\r
- else if (strstr (mode, "logid"))m=5;\r
- else m=0;\r
- \r
- \r
- \r
- for ( a=0; a< A->nseq-1; a++)\r
- {\r
- simm[a][a]=MAXID;\r
- for ( b=a+1; b< A->nseq; b++)\r
- {\r
- gap=pos0=pos1=pos2=len=sim=0;\r
- \r
- for ( c=0; c< A->len_aln; c++)\r
- {\r
- r1=tolower (A->seq_al[a][c]);\r
- r2=tolower (A->seq_al[b][c]);\r
- p1=(r1!='-')?1:0;\r
- p2=(r2!='-')?1:0;\r
- p3=p1+p2;\r
- if ( p3==0)continue;\r
- if ( p3==1)gap++;\r
- if ( r1==r2)sim++;\r
- pos1+=p1;\r
- pos2+=p2;\r
- pos0+=(p3==2)?1:0;\r
- len++;\r
- }\r
-\r
- if (m==0)simm[a][b]=simm[b][a]=(pos0==0)?0:(sim*MAXID)/pos0; //sim1\r
- else if (m==1) simm[a][b]=simm[b][a]=(MIN(pos1,pos2)==0)?0:(sim*MAXID)/MIN(pos1,pos2);//sim2\r
- else if (m==2) simm[a][b]=simm[b][a]=(MAX(pos1,pos2)==0)?0:(sim*MAXID)/MAX(pos1,pos2);//sim3\r
- else if (m==3) simm[a][b]=simm[b][a]=(len==0) ?0:((len-gap)*MAXID)/len;//gap1\r
- else if (m==4) simm[a][b]=simm[b][a]=(len==0) ?0:((pos0)*MAXID)/len; //cov\r
- else if (m==5) \r
- {\r
- \r
- //Inspired from Muscle +mafft 5\r
- simm[a][b]=simm[b][a]=logid_score ( sim, len);\r
- }\r
- }\r
- }\r
- return simm;\r
-}\r
-int logid_score ( int sim, int len)\r
-{\r
- float score;\r
- \r
- if ( len==0)return (int)(0.33*(float)MAXID);\r
- \r
- score=(float)sim/(float)len;\r
- if (score>0.9) score=1.0;\r
- else score=-log10 (1.0-score);\r
- \r
- score=(score*MAXID);\r
- return score;\r
-}\r
-int ** aln2sim_mat (Alignment *A, char*mode)\r
-{\r
- \r
- \r
- if ( strstr (mode, "idmat"))return fast_aln2sim_mat(A, mode);\r
- return get_sim_aln_array(A, mode);\r
-}\r
-int ** aln2cov (Alignment *A)\r
-{\r
- int a, b, c;\r
- int r1, r2, gr1, gr2, pos0, gap;\r
- int **cov;\r
- cov=declare_int (A->nseq, A->nseq);\r
- \r
- for (a=0; a< A->nseq-1; a++)\r
- {\r
- cov[a][a]=100;\r
- for ( b=a+1; b<A->nseq; b++)\r
- {\r
- for (gap=0,pos0=0,c=0;c<A->len_aln; c++)\r
- {\r
- r1=A->seq_al[a][c];\r
- r2=A->seq_al[b][c];\r
- gr1=is_gap(r1); gr2=is_gap(r2);\r
- if ( gr1+gr2==0)pos0++;\r
- else if ( gr1+gr2<2)gap++;\r
- }\r
- cov[a][b]=cov[b][a]=((gap+pos0)==0)?0:((pos0*100)/(gap+pos0));\r
- }\r
- }\r
- return cov;\r
-}\r
-int ** get_raw_sim_aln_array (Alignment *A, char *mode)\r
-{\r
- int **w;\r
- int **M;\r
- int a, b, c, r1, r2, set, max, min;\r
- \r
- w=declare_int (A->nseq, A->nseq);\r
- if (strstr(mode, "sar"))M=NULL;\r
- else M=read_matrice (mode);\r
- \r
- HERE ("RAW STUFF");\r
- \r
- for ( set=0,a=0; a< A->nseq; a++)\r
- for (b=a; b<A->nseq; b++)\r
- {\r
- if (M)\r
- {\r
- for (c=0; c<A->len_aln; c++)\r
- {\r
- r1=A->seq_al[a][c];\r
- r2=A->seq_al[b][c];\r
- \r
- if ( !is_gap(r1) && !is_gap(r2))\r
- w[a][b]+=M[r1-'A'][r2-'A'];\r
- }\r
- }\r
- else if ( strm (mode, "sarmat2"))\r
- {\r
- w[a][b]=get_sar_sim2 (A->seq_al[a], A->seq_al[b]);\r
- }\r
- else\r
- {\r
- HERE ("ERROR: %s is an unknown mode of raw_sim\n", mode); exit (0);\r
- }\r
- \r
- w[b][a]=w[a][b];\r
- if (!set){min=max=w[a][b];set=1;}\r
- min=MIN(min,w[a][b]);\r
- max=MAX(max,w[a][b]);\r
- }\r
- for (a=0; a<A->nseq; a++)\r
- for (b=a; b<A->nseq; b++)\r
- {\r
- w[b][a]=((max-min)==0)?0:((w[b][a]-min)*100)/(max-min);\r
- w[a][b]=w[b][a];\r
- }\r
- free_int (M, -1);\r
- return w;\r
-}\r
-int ** get_sim_aln_array ( Alignment *A, char *mode)\r
- {\r
- int **w;\r
- int a, b;\r
- \r
-\r
- w=declare_int ( A->nseq, A->nseq);\r
- \r
- for ( a=0; a< A->nseq-1; a++)\r
- {\r
- for ( b=a+1; b< A->nseq; b++)\r
- {\r
-\r
- w[a][b]=w[b][a]=generic_get_seq_sim ( A->seq_al[a], A->seq_al[b], (A->cdna_cache)?A->cdna_cache[0]:NULL, mode);\r
- }\r
- }\r
- return w;\r
- }\r
-int generic_get_seq_sim ( char *seq1, char *seq2, int*cache, char *mode)\r
-{\r
-\r
- \r
- if ( strm (mode, "cdna"))\r
- return get_cdna_seq_sim ( cache, seq1, seq2,GAP_LIST, mode); \r
- else if ( strnm (mode, "ktup",4))\r
- return ktup_comparison (seq1, seq2,atoi(mode+4));\r
- else if ( strstr (mode, "sarmat2"))\r
- {\r
- \r
- return get_sar_sim2 (seq1, seq2);\r
- }\r
- else if ( strstr (mode, "sarmat"))\r
- return (int) get_sar_sim (seq1,seq2);\r
- else\r
- {\r
- return get_seq_sim ( seq1,seq2,GAP_LIST, mode);\r
- }\r
-}\r
-int *** get_winsim_aln_array ( Alignment *A,char *mode, int ***w)\r
- {\r
- int a, b;\r
- for ( a=0; a< A->nseq; a++)\r
- for ( b=0; b< A->nseq; b++)\r
- {\r
- if ( strm (mode, "cdna"))\r
- w[a][b]=get_cdna_seq_winsim ( A->cdna_cache[0], A->seq_al[a], A->seq_al[b],GAP_LIST, mode, w[a][b]); \r
- else\r
- w[a][b]=get_seq_winsim ( A->seq_al[a], A->seq_al[b],GAP_LIST, mode, w[a][b]);\r
- }\r
- return w;\r
- }\r
-\r
-Alignment * seq2profile (Sequence *S, int i)\r
-{\r
- Alignment *A;\r
- \r
- if ((A=seq2R_template_profile (S, i)))\r
- {\r
- return A;\r
- }\r
- else\r
- {\r
- char *tmp;\r
- FILE *fp;\r
- tmp=vtmpnam (NULL);\r
- fp=vfopen ( tmp, "w");\r
- fprintf (fp, ">%s\n%s\n", S->name[i], S->seq[i]);\r
- vfclose (fp);\r
- \r
- (S->T[i])->R=fill_R_template (S->name[i], tmp, S);\r
- \r
- return seq2R_template_profile (S, i);\r
- }\r
-}\r
-\r
-Alignment* aln2sub_aln_file (Alignment *A, int n, char **string)\r
-{\r
- char ***list;\r
- int a;\r
-\r
- list=vcalloc (A->nseq, sizeof (char***));\r
- if ( n==0)return A;\r
- else if (n>1)\r
- {\r
- int l;\r
- char *buf;\r
- \r
- for (l=0,a=0; a< n; a++)l+=strlen (string[a]);\r
- buf=vcalloc ( 2*n+l+1, sizeof (char));\r
- for (a=0; a< n; a++){buf=strcat (buf,string[a]), buf=strcat ( buf, " ");} \r
- list[0]=string2list (buf);\r
- vfree (buf);\r
- }\r
- else if ( file_exists (NULL,string[0]))\r
- {\r
- list=read_group (string[0]);\r
-\r
- }\r
- else\r
- {\r
- fprintf (stderr, "\nERROR: file <%s> does not exist [FATAL:%s]\n",string[0], PROGRAM);\r
- myexit (EXIT_FAILURE);\r
- } \r
-\r
- \r
- a=0;\r
- while (list[a])\r
- {\r
- int i, b;\r
- FILE *fp;\r
- n=atoi (list[a][0]);\r
- fp=vfopen (list[a][1], "w");\r
- for (b=2; b<n; b++)\r
- {\r
- i=name_is_in_list (list[a][b], A->name, A->nseq, MAXNAMES);\r
- if (n==3)ungap (A->seq_al[i]);\r
- fprintf (fp, ">%s\n%s\n", A->name[i], A->seq_al[i]); \r
- }\r
- vfclose (fp);\r
- free_char (list[a], -1);\r
- a++;\r
- }\r
- vfree(list);\r
- return A;\r
-}\r
-Sequence *remove_empty_sequence (Sequence *S)\r
-{\r
- int a, b;\r
- char *c;\r
- Sequence *NS;\r
- \r
- c=vcalloc ( S->max_len+1, sizeof (char));\r
- \r
- for (a=0, b=0; a< S->nseq; a++)\r
- {\r
- sprintf ( c, "%s",S->seq[a]);\r
- ungap (c);\r
- if ( strlen (c)==0)\r
- {\r
- //vfree (S->seq[a]);\r
- S->seq[a]=NULL;\r
- add_warning ( stderr, "WARNING: Sequence %s does not contain any residue: automatically removed from the set [WARNING:%s]",S->name[a], PROGRAM);\r
- }\r
- }\r
- NS=duplicate_sequence (S);\r
- free_sequence (S, S->nseq);\r
- vfree (c);\r
- return NS;\r
-}\r
-Alignment* aln2sub_seq (Alignment *A, int n, char **string)\r
-{\r
- char ***list;\r
- int a;\r
- Sequence *S=NULL;\r
- \r
- list=vcalloc (A->nseq, sizeof (char***));\r
- if ( n==0)return A;\r
- else if (n>1)\r
- {\r
- int l;\r
- char *buf;\r
- \r
- for (l=0,a=0; a< n; a++)l+=strlen (string[a]);\r
- buf=vcalloc ( 2*n+l+1, sizeof (char));\r
- for (a=0; a< n; a++){buf=strcat (buf,string[a]), buf=strcat ( buf, " ");} \r
- list[0]=string2list (buf);\r
- vfree (buf);\r
- }\r
- else if ( file_exists (NULL,string[0]))\r
- {\r
- list=read_group (string[0]);\r
-\r
- }\r
- else\r
- {\r
- fprintf (stderr, "\nERROR: file <%s> does not exist [FATAL:%s]\n",string[0], PROGRAM);\r
- myexit (EXIT_FAILURE);\r
- } \r
-\r
- \r
- \r
- a=0;\r
- while (list[a])\r
- {\r
- int t;\r
- Alignment *B;\r
- Sequence *subS;\r
- \r
- \r
- B=main_read_aln (list[a][1], NULL);\r
- t=aln2most_similar_sequence(B, "idmat");\r
- subS=extract_one_seq(B->name[t],0,0,B,KEEP_NAME);\r
- S=add_sequence (subS,S,0);\r
- free_aln (B);free_sequence (subS, -1);\r
- vremove (list[a][1]);\r
- a++;\r
- }\r
- vfree(list);\r
- return seq2aln (S, NULL, RM_GAP);\r
-}\r
-\r
-Alignment * aln2collapsed_aln (Alignment * A, int n, char **string)\r
-{\r
- Alignment *B;\r
- char ***list;\r
- char **list2;\r
- char *buf=NULL;\r
- FILE *fp;\r
- int a, b,c, ns, m, l;\r
- int *collapsed;\r
- \r
- list=vcalloc (A->nseq, sizeof (char***));\r
- ns=0;\r
- if ( n==0)return A;\r
- else if (n>1)\r
- {\r
- for (l=0,a=0; a< n; a++)l+=strlen (string[a]);\r
- buf=vcalloc ( 2*n+l+1, sizeof (char));\r
- for (a=0; a< n; a++){buf=strcat (buf,string[a]), buf=strcat ( buf, " ");}\r
- \r
- list[0]=string2list (buf);ns=1;\r
- \r
- }\r
- else if ( file_exists (NULL,string[0]))\r
- {\r
- /*Format: Fasta like, the name fo the group followed with the name of the sequences\r
- ><Group name> <First Seq> <second seq> ....\r
- Groups must NOT be overlaping\r
- */\r
- l=measure_longest_line_in_file (string[0])+1; \r
- buf=vcalloc (l, sizeof (char));\r
- ns=0;\r
- fp=vfopen (string[0], "r");\r
- while ((c=fgetc(fp))!=EOF)\r
- {\r
- buf=fgets (buf,l-1, fp);\r
- if ( c=='>')list[ns++]=string2list (buf);\r
- }\r
- vfclose (fp);\r
- }\r
- else\r
- {\r
- fprintf (stderr, "\nERROR: file <%s> does not exist [FATAL:%s]\n",string[0], PROGRAM);\r
- myexit (EXIT_FAILURE);\r
- }\r
- \r
- vfree (buf); buf=NULL;\r
-\r
- /*Identify lost sequences*/\r
- collapsed=vcalloc (A->nseq, sizeof (int));\r
- for ( a=0; a< ns; a++)\r
- {\r
- m=atoi (list[a][0]); \r
- for (b=2; b<m ; b++)\r
- {\r
- c=name_is_in_list (list[a][b], A->name, A->nseq, MAXNAMES);\r
- if ( c>=0)collapsed[c]=1;\r
- }\r
- }\r
- for ( a=0; a< A->nseq; a++)\r
- {\r
- if ( collapsed[a]==0)\r
- {\r
- list[ns]=declare_char (3, MAXNAMES);\r
- sprintf ( list[ns][0], "3");\r
- sprintf ( list[ns][1], "%s", A->name[a]);\r
- sprintf ( list[ns][2], "%s", A->name[a]);\r
- ns++;\r
- }\r
- }\r
- vfree (collapsed);\r
- \r
- \r
- \r
-\r
-\r
- list2=declare_char (A->nseq, 100);\r
- /*1 Collapse the alignment*/\r
- for ( a=0; a< ns; a++)\r
- {\r
- sprintf ( list2[a], "%s", list[a][2]);\r
- }\r
- B=extract_sub_aln2 ( A, ns, list2);\r
- /*2 Rename the sequences*/\r
- for ( a=0; a< ns; a++)\r
- {\r
- sprintf ( B->name[a], "%s", list[a][1]);\r
- }\r
- /*replace sequence with consensus*/\r
- \r
- for ( a=0; a< ns; a++)\r
- {\r
- m=atoi (list[a][0]);\r
- for (c=0, b=2; b<m;c++, b++)\r
- {\r
- sprintf ( list2[c], "%s", list[a][b]);\r
- }\r
- buf=sub_aln2cons_seq_mat2 ( A,m-2,list2, "blosum62mt");\r
- sprintf (B->seq_al[a], "%s", buf); \r
- }\r
- vfree (buf);\r
-\r
- free_aln (A);\r
- B->S=aln2seq(B);\r
- return B;\r
-}\r
-Alignment * aln2profile (Alignment * A)\r
- {\r
- Alignment *B=NULL;\r
- char *cons;\r
- \r
- if (!A->P)\r
- {\r
- A->P=declare_profile (AA_ALPHABET,A->len_aln+1);\r
- }\r
- B=copy_aln (A, B);\r
- free_int ((A->P)->count, -1);\r
- free_int ((A->P)->count2, -1);\r
- free_int ((A->P)->count3, -1);\r
- (A->P)->count=aln2count_mat (A);\r
- (A->P)->count2=aln2count_mat2 (A);\r
- \r
- cons=aln2cons_seq_mat (A, "blosum62mt");\r
- \r
- sprintf (B->seq_al[0], "%s", cons);\r
- B->nseq=1;\r
- (A->P)->count3=aln2count_mat2 (B);\r
- vfree (cons);\r
- free_aln (B);\r
- \r
- \r
-\r
- return A;\r
- \r
- }\r
-\r
-int** aln2count_mat2 ( Alignment *A)\r
-{\r
- return sub_aln2count_mat2 (A, 0, NULL);\r
-}\r
-\r
-int sub_aln2nseq_prf ( Alignment *A, int ns, int *ls)\r
-{\r
- \r
- \r
- int a, c, s;\r
- Alignment *R;\r
- int n;\r
- int free_ls=0;\r
- \r
- \r
- if ( ns==0)\r
- {\r
- n=ns=A->nseq;\r
- ls=vcalloc (n, sizeof (int));\r
- for ( a=0; a<A->nseq; a++)ls[a]=a;\r
- free_ls=1;\r
- }\r
- else\r
- {\r
- n=ns;\r
- }\r
-\r
- for (c=0,a=0; a<ns; a++)\r
- {\r
- s=ls[a];\r
- if ( A->S && (R=seq2R_template_profile (A->S, A->order[s][0]))!=NULL)\r
- {\r
- n+=R->nseq;\r
- }\r
- else\r
- {\r
- ;\r
- }\r
- }\r
- \r
- if ( free_ls) vfree (ls);\r
- return n;\r
-}\r
-\r
-int** sub_aln2count_mat2 ( Alignment *A, int ns, int *ls)\r
-{\r
- char **p;\r
- int **count;\r
- int a, b, c, s;\r
- Alignment *R;\r
- int n;\r
- int free_ls=0;\r
- \r
- if ( ns==0)\r
- {\r
- n=ns=A->nseq;\r
- p=vcalloc ( n, sizeof (char*));\r
- ls=vcalloc (n, sizeof (int));\r
- for ( a=0; a<A->nseq; a++)ls[a]=a;\r
- free_ls=1;\r
- }\r
- else\r
- {\r
- n=ns;\r
- p=vcalloc (n, sizeof (char*));\r
- }\r
-\r
- for (c=0,a=0; a<ns; a++)\r
- {\r
- s=ls[a];\r
- if ( A->S && (R=seq2R_template_profile (A->S, A->order[s][0]))!=NULL)\r
- {\r
- n+=R->nseq;\r
- p=vrealloc (p, n*sizeof (char*));\r
- for (b=0; b<R->nseq; b++)\r
- {\r
- p[c++]=R->seq_al[b];\r
- }\r
- }\r
- else\r
- {\r
- int w;\r
- w=A->order[s][4]+1;\r
- \r
- for (b=0; b<w; b++)\r
- p[c++]=A->seq_al[s];\r
- }\r
- }\r
- count=sub_aln2count_mat3 (p,c);\r
- vfree (p);\r
- if ( free_ls) vfree (ls);\r
- return count;\r
-}\r
-int** sub_aln2count_mat3 (char **al, int ns)\r
-{\r
- int **count;\r
- int used[1000];\r
- int a, b;\r
- int r;\r
- \r
- int len;\r
- int us;\r
- \r
- \r
- /*count[x][0]=n symbols in column\r
- count[x][1]=total_size of line\r
- count[x][2]=Gap frequency\r
- \r
- count[x][n]=symbol n\r
- count[x][n+1]=N occurence symbol n;\r
- count[x][n+2]=N frequence symbol n*100;\r
- \r
- special multi-channeling\r
- count[x][count[x][1]]=Nseq\r
- count[x][count[x][1]+s]=residue col x, sequence s\r
- */\r
-\r
-\r
- for (a=0; a< 1000; a++)used[a]=0;\r
- len=strlen (al[0]);\r
- \r
- count=declare_int (len+2,100+ns+2);\r
- count[len][0]=END_ARRAY;\r
- count[len][1]=ns;\r
- count[len][2]=len;\r
- \r
-\r
- \r
- for (a=0; a<len; a++)\r
- {\r
- for (us=ns, b=0; b<ns; b++)\r
- {\r
- r=tolower (al[b][a]);\r
- \r
- if (is_gap(r))us--;\r
- else if (used[r])\r
- {\r
- count[a][used[r]*3+1]++;\r
- }\r
- else\r
- {\r
- used[r]=++count[a][0];\r
- count[a][used[r]*3]=r;\r
- count[a][used[r]*3+1]++;\r
- }\r
- }\r
- count[a][1]=count[a][0]*3+2;\r
- /*count[a][2]=(A->nseq-us)*100/A->nseq;*/\r
- count[a][2]=ns-us;\r
- \r
- for (b=3; b<count[a][1]; b+=3)\r
- {\r
- count[a][b+2]=(count[a][b+1]*100)/us;\r
- used[count[a][b]]=0;\r
- }\r
-\r
- \r
- /*Option for multi channeling*/\r
- \r
- /*\r
- count[a][count[a][1]]=A->nseq;\r
- for (b=1; b<=A->nseq; b++)\r
- count [a][count[a][1]+b]=(is_gap(A->seq_al[b-1][a]))?0:A->seq_al[b-1][a];\r
- */\r
- }\r
-#ifdef XXXXXX\r
- HERE ("Display ");\r
- for (a=0; a< 5; a++)\r
- {\r
- fprintf ( stderr, "\n");\r
- for ( b=3; b< count[a][1]; b+=3)\r
- {\r
- fprintf ( stderr, "[%c %d]", count[a][b], count[a][b+1]);\r
- }\r
- fprintf ( stderr, "\n");\r
- for ( b=0; b<ns; b++)\r
- {\r
- fprintf ( stderr, "%c", al[b][a]);\r
- }\r
- }\r
- HERE ("End of Display");\r
-#endif\r
- return count;\r
-}\r
- \r
-int** aln2count_mat ( Alignment *A)\r
- { /*\r
- function documentation: start\r
- \r
- int output_freq_mat ( char *outfile, Aligmnent *A)\r
-\r
- This function counts the number of residues in each column of an alignment (Prot/NA)\r
- It outputs these values in the following format\r
-\r
- This format can be piped into:\r
- The routine used for computing the p-value gmat-inf-gc-v2c\r
- \r
- function documentation: end\r
- */\r
- \r
- int a, b,x;\r
- int **freq_mat;\r
- int alp_size;\r
-\r
- alp_size=sizeof (AA_ALPHABET); \r
- freq_mat=declare_int (alp_size+2, A->len_aln);\r
- \r
-\r
- for ( a=0; a<A->len_aln; a++)\r
- {\r
- for ( b=0; b< A->nseq; b++)\r
- {\r
- if ( is_gap ( A->seq_al[b][a]))freq_mat[alp_size][a]++;\r
- else\r
- {\r
- x=tolower(A->seq_al[b][a]);\r
- freq_mat[x-'a'][a]++;\r
- freq_mat[alp_size+1][a]++;\r
- \r
- }\r
- }\r
- }\r
- \r
- return freq_mat;\r
- }\r
-char *aln2random_seq (Alignment *A, int pn1, int pn2, int pn3, int gn)\r
- {\r
-\r
- /* \r
-\r
- \r
- Given the frequencies in A ( read as total counts of each Residue in\r
- freq[A->nseq][A->len_aln], and pn1, pn2 and pn3:\r
- \r
- 1-Generate a new amino-acid at each position\r
- 2-Insert Gaps, using a HMM.\r
-\r
- \r
- pn3=Weight of the noise induced with sub mat.\r
-\r
- pn1=% noise type 1 ( Varies with entropi)\r
- n1=Ratio noise type 1\r
- \r
- T =Nseq \r
- t1=Noise 1 expressed in Nseq\r
- al=alphabet size;\r
- ncat=number of non 0 cat for a given position\r
- ICi initial count for residue i\r
-\r
- Ci=freq[seq][AA]\r
- t1=T*n1*(1-1/ncat);\r
- t2=T*n2;\r
- \r
- Ci= ICi*(T-(t1+t2))/T +(t1)/al+(t2)/al\r
- \r
- */\r
- \r
- int **freq;\r
- int **count;\r
- float T, tot_t1, tot_t2,tot_t3, n1, n2, n3;\r
- float ncat;\r
- \r
- double gf;\r
- double *init_freq;\r
- double *blur_freq;\r
- double *t1, *t2,*t3;\r
- int a, b, c, x;\r
- char *seq;\r
- int tot;\r
- /*Viterbi Parameters */\r
- \r
- int p;\r
- int AL=0; /*Allowed Transition*/\r
- int F=-100000; /*Forbiden Transition*/\r
-\r
- int GAP_TRANSITION;\r
- int IGAP=0, IAA=1;\r
- \r
- int state,best_state=0, score, best_score=0;\r
- int p_state;\r
- int e=0;\r
- int **score_tab;\r
- int **state_tab;\r
- int nstate=2;\r
- int **transitions;\r
- \r
- int max;\r
-\r
- seq=vcalloc ( A->len_aln+1, sizeof (char)); \r
- count=aln2count_mat(A);\r
- freq=aln2count_mat(A);\r
-\r
- T=100;\r
-\r
- n1=(float)pn1/100;\r
- n2=(float)pn2/100;\r
- n3=(float)pn3/100;\r
- \r
- for ( a=0; a< A->len_aln; a++)\r
- {\r
- for ( b=0; b<26; b++)\r
- freq[b][a]=freq[b][a]*((T)/(A->nseq-freq[26][a]));\r
- freq[26][a]= (freq[26][a]*T)/A->nseq;\r
- }\r
-\r
- \r
- init_freq=vcalloc ( 26, sizeof (double));\r
- blur_freq=vcalloc ( 26, sizeof (double));\r
- \r
- tot_t1=tot_t2=tot_t3=0;\r
- \r
- t1=vcalloc ( 27, sizeof (double));\r
- t2=vcalloc ( 27, sizeof (double));\r
- t3=vcalloc ( 27, sizeof (double));\r
- for (a=0; a< A->len_aln; a++)\r
- {\r
-\r
- /*Compute Frequencies*/\r
- for (tot=0, b=0; b<26; b++)\r
- {\r
- if ( is_aa(b+'A'))\r
- {\r
- init_freq[b]=freq[b][a];\r
- tot+=freq[b][a];\r
- }\r
- }\r
- /*Count the number of different amino acids*/\r
- for ( ncat=0, b=0; b<=26; b++)\r
- {\r
- ncat+=(freq[b][a]!=0)?1:0; \r
- }\r
- /*Blurr the distribution using */\r
- blur_freq=compute_matrix_p (init_freq,tot); \r
- \r
- \r
- /*compute noise 1: biased with blurred content * enthropy--> keeps prosite motifs*/\r
- tot_t1=T*n1*(1-1/ncat);\r
- for ( b=0; b< 26; b++)if ( is_aa(b+'A')){t1[b]=blur_freq[b]*(1-1/ncat)*n1;}\r
- \r
- /*Compute noise 2: completely random*/\r
- tot_t2=T*n2;\r
- for ( b=0; b< 26; b++)if ( is_aa(b+'A')){t2[b]=tot_t2/21;}\r
- \r
- /*compute noise 3: biased with the sole content(pam250mt)*/\r
- tot_t3=T*n3;\r
- for ( b=0; b<26; b++)if ( is_aa(b+'A')){t3[b]=blur_freq[b]*n3;}\r
- \r
- for ( b=0; b<26; b++)\r
- {\r
- if ( is_aa('A'+b))\r
- freq[b][a]=freq[b][a]*(T-(tot_t1+tot_t2+(tot_t3)))/T+t1[b]+t2[b]+t3[b];\r
- }\r
- \r
- /*end of the loop that mutates position a*/\r
- }\r
- \r
- vfree (blur_freq);\r
- vfree (init_freq);\r
- vfree ( t3);\r
- \r
- /*1-Generate the amino acids of the new sequence new*/\r
- \r
- \r
- vsrand (0);\r
- \r
- for ( a=0; a< A->len_aln; a++)\r
- {\r
-\r
- for (T=0,b=0; b<26; b++)T+=freq[b][a];\r
- x=rand ()%((int)T);\r
- for (c=0,b=0; b<26; b++)\r
- {\r
- c+=freq[b][a];\r
- if ( c>=x)\r
- {\r
- seq[a]='A'+b;\r
- c=-1;\r
- break;\r
- }\r
- }\r
- if ( c!=-1)seq[a]='-';\r
- }\r
- seq[a]='\0';\r
- \r
-\r
- /*2 Generate the gaps in the new sequence*/\r
- \r
- \r
-\r
- if ( gn<0);\r
- else\r
- {\r
-\r
- transitions=declare_int ( nstate, nstate);\r
- score_tab=declare_int ( A->len_aln+2, nstate );\r
- state_tab=declare_int ( A->len_aln+2, nstate );\r
- \r
- \r
- \r
- for (a=0; a<nstate;a++)\r
- for (b=0; b<nstate;b++)\r
- {transitions[a][b]=F;}\r
- \r
- GAP_TRANSITION=AL-gn;\r
-\r
- transitions[IGAP ][IGAP ]=AL;\r
- transitions[IAA][IAA]=AL;\r
- transitions[IAA ][IGAP]=GAP_TRANSITION;\r
- transitions[IGAP][IAA ]=GAP_TRANSITION;\r
- \r
- \r
- for ( p=1; p<=A->len_aln; p++){for (state=0; state< nstate; state++){score_tab[p][state]=F;state_tab[p][state]=-1;} }\r
- \r
- for (p=1; p<= A->len_aln; p++)\r
- {\r
- for (max=0,a=0; a<26; a++)max=MAX(max, freq[a][p-1]);\r
- max=(max*(A->nseq-count[26][p-1]))/A->nseq;\r
- \r
- for (state=0; state< nstate; state++)\r
- {\r
- \r
- \r
- gf=freq[26][p-1];\r
- if ( state==IGAP) e=gf-50;\r
- else if ( state==IAA ) e=max-50;\r
- for (p_state=0; p_state<nstate; p_state++)\r
- {\r
- score=(score_tab[p-1][p_state]==F)?F:(e+transitions[p_state][state]+score_tab[p-1][p_state]);\r
- if(p_state==0 || score>best_score){ best_score=score;best_state=p_state;}\r
- }\r
- score_tab[p][state]=best_score;\r
- state_tab[p][state]=best_state;\r
- }\r
- }\r
- \r
- for (state=0; state<nstate; state++)\r
- {\r
- if (state==0 || score_tab[p-1][state]>best_score){best_score=score_tab[p-1][state]; best_state=state;}\r
- }\r
- \r
- for (p=A->len_aln; p>0;)\r
- {\r
- if ( best_state==IGAP)\r
- {\r
- seq[p-1]='-';\r
- }\r
- else if ( best_state==IAA)\r
- {\r
- seq[p-1]=seq[p-1];\r
- }\r
- best_state=state_tab[p][best_state];\r
- p--;\r
- }\r
- }\r
- \r
- free_int (freq, -1);\r
- return seq;\r
- }\r
-\r
-/********************************************************************/\r
-/* */\r
-/* Weighting functions */\r
-/* */\r
-/* */\r
-/* */\r
-/********************************************************************/\r
-Alignment * master_trimseq( Alignment *A, Sequence *S,char *mode)\r
- {\r
- Alignment *NA;\r
- char *p;\r
- int a, b;\r
- int use_aln=0, upper_sim=0, min_nseq=0, lower_sim=0;\r
- float f_upper_sim, f_lower_sim;\r
- char weight_mode[1000];\r
- char method[1000];\r
- int statistics=0;\r
- int trim_direction=TOP;\r
- float **sim_weight;\r
- int *seq_list;\r
- int table=0;\r
- \r
- \r
- \r
-\r
- /*\r
- mode: \r
- (trim)_<seq or aln>_%<percentage of tot weight to keep>_n<number of seq to keep>_w<weight mode>\r
- */\r
- \r
-\r
- \r
- seq_list=vcalloc ( S->nseq, sizeof (int));\r
- for ( a=0; a< A->nseq; a++)\r
- {\r
- seq_list[a]=1;\r
- }\r
- \r
-\r
- use_aln=aln_is_aligned(A);\r
- \r
- if ( mode[0]=='\0')\r
- {\r
- \r
- upper_sim=50;\r
- lower_sim=0;\r
- min_nseq=0;\r
- sprintf (weight_mode, "pwsim");\r
- sprintf ( method, "clustering2");\r
- }\r
- else \r
- {\r
- \r
- upper_sim=lower_sim=min_nseq;\r
- sprintf (weight_mode, "pwsim");\r
- sprintf ( method, "clustering2");\r
- }\r
-\r
- /*\r
- U or % (deprecated) Upper bound for pairwise similarity\r
- L or m (depercated) Lower bound for pairwise similarity\r
- n max number of sequences\r
- N max number of sequences as a fraction of thet total\r
- S print Statistics\r
- T print Table of distances\r
- */\r
-\r
- \r
-\r
- while ( (p=strtok(mode, "_")))\r
- { \r
- mode=NULL;\r
- if (strm (p, "seq"))use_aln=0;\r
- else if ( strm(p,"aln"))use_aln=1;\r
- else if (p[0]=='s')statistics=1;\r
- else if (p[0]=='t')table=1;\r
- else if (p[0]=='U')upper_sim=atoi(p+1);\r
- else if (p[0]=='L')lower_sim=atoi(p+1);\r
- else if (p[0]=='n')min_nseq=atoi(p+1);\r
- else if (p[0]=='N')min_nseq=atoi(p+1)*-1;\r
- else if (p[0]=='B')trim_direction=BOTTOM;\r
- else if (p[0]=='T')trim_direction=TOP;\r
- else if (p[0]=='W')sprintf (weight_mode, "%s", p+1);\r
- else if (p[0]=='M')sprintf (method, "%s", p+1);\r
- else if (p[0]=='K')\r
- {\r
- \r
- while ((p=strtok(NULL, ":")))\r
- {\r
- \r
- if ( p[0]=='#')\r
- {\r
- seq_list[atoi(p+1)-1]=2;\r
- }\r
- else if ( (a=name_is_in_list (p, A->name, A->nseq, 100))!=-1)\r
-\r
- {\r
- seq_list[a]=2;\r
- } \r
- }\r
- }\r
- }\r
- \r
- if ( !upper_sim && !min_nseq && !lower_sim)upper_sim=50;\r
- \r
- \r
-\r
- if (!S)\r
- {\r
- fprintf ( stderr, "\ntrimseq requires a set of sequences[FATAL:%s]\n", PROGRAM);\r
- crash("");\r
- }\r
- \r
- else if ( min_nseq> S->nseq)\r
- {\r
- min_nseq=S->nseq;\r
- }\r
- else if ( min_nseq<0)\r
- {\r
- if ( min_nseq<-100)\r
- {\r
- add_warning ( stderr, "\nWARNING: trimseq: Nseq(N) max_val=100%% [Automatic reset]\n");\r
- min_nseq=-100;\r
- }\r
- \r
- min_nseq=(int)((float)S->nseq*((float)min_nseq/100)*-1);\r
- }\r
-\r
-\r
- NA=seq2subseq3 (A, S,use_aln,lower_sim,upper_sim,min_nseq,trim_direction, weight_mode,&sim_weight, seq_list );\r
- \r
- if ( table)\r
- {\r
- fprintf ( stderr, "\nSIMILARITY MATRIX\n");\r
- for ( a=0; a< A->nseq-1; a++)\r
- for ( b=a+1; b< A->nseq; b++)\r
- {\r
- fprintf ( stderr, "%15s Vs %15s : %3.2f %% id\n", A->name[a], A->name[b], 100-sim_weight[a][b]);\r
- }\r
- }\r
- if ( statistics)\r
- {\r
- f_upper_sim=(upper_sim>100)?((float)upper_sim/(float)100):upper_sim;\r
- f_lower_sim=(upper_sim>100)?((float)lower_sim/(float)100):lower_sim;\r
- \r
- fprintf ( stderr, "\nTRIM Informations:\n");\r
- fprintf ( stderr, "\tUse...........: %s\n",(use_aln)?"multiple_aln":"pairwise_aln");\r
- fprintf ( stderr, "\tcluster_mode..: %s\n" ,method);\r
- fprintf ( stderr, "\tsim_mode......: %s\n" ,weight_mode);\r
- fprintf ( stderr, "\tlower_id_bound: %.2f%%\n" ,(f_lower_sim==0)?-1:f_lower_sim);\r
- fprintf ( stderr, "\tupper_id_bound: %.2f%%\n",(f_upper_sim==0)?-1:f_upper_sim);\r
- fprintf ( stderr, "\tnseq_kept.....: %d (out of %d)\n" ,NA->nseq, S->nseq);\r
- fprintf ( stderr, "\treduction.....: %d%% of original set\n" ,(NA->nseq*100)/S->nseq);\r
- fprintf ( stderr, "\tTrim_direction: From %s \n" ,(trim_direction==BOTTOM)?"Bottom":"Top");\r
- }\r
-\r
- return NA;\r
- }\r
-\r
-Alignment *sim_filter (Alignment *A, char *in_mode, char *seq)\r
-{\r
- int **sim, **cov;\r
- int *list;\r
- int *keep;\r
- int maxnseq, nseq_ratio, nc;\r
- int new_nseq;\r
- int a, s, n, k;\r
- Alignment *R;\r
- char *mode;\r
- int outlayers;\r
- int direction=1;//remove the higher than\r
- int coverage=0; //remove based on coverage\r
- static char *field;\r
- int maxsim, minsim, maxcov, mincov;\r
- \r
- if ( !field) field=vcalloc (1000, sizeof (char));\r
- \r
- mode=vcalloc ( strlen (in_mode)+10, sizeof (char));\r
- sprintf ( mode, "_%s_", in_mode);\r
- \r
- strget_param ( mode, "_I", "100", "%d", &maxsim);\r
- strget_param ( mode, "_i", "0", "%d", &minsim);\r
- strget_param ( mode, "_C", "100", "%d", &maxcov);\r
- strget_param ( mode, "_c", "0", "%d", &mincov);\r
- \r
- \r
- \r
- \r
- \r
- keep=vcalloc ( A->nseq, sizeof (int));\r
- list=vcalloc ( A->nseq, sizeof (int));\r
- \r
- \r
- \r
-\r
- \r
- \r
- if (!seq)s=0;\r
- else s=name_is_in_list (seq, A->name, A->nseq, 100);\r
- if (s==-1) \r
- {\r
- \r
- if ( s==-1)printf_exit (EXIT_FAILURE, stderr, "ERROR: %s is not a valid sequence", seq);\r
- }\r
- else\r
- keep[s]=1;\r
- \r
- //get the distances\r
- if ( strstr (mode, "_seq_"))\r
- {\r
- char **seq;\r
- int **M;\r
- \r
- M=read_matrice ("blosum62mt");\r
- seq=declare_char (A->nseq, A->len_aln+1);\r
- for (a=0; a<A->nseq; a++)\r
- {\r
- sprintf ( seq[a], "%s", A->seq_al[a]);\r
- ungap (seq[a]);\r
- }\r
- \r
- sim=declare_int (A->nseq, A->nseq);\r
- cov=declare_int (A->nseq, A->nseq);\r
- \r
- for (a=0; a<A->nseq; a++)\r
- {\r
- if ( s!=a)\r
- {\r
- sim[s][a]=sim[a][s]=idscore_pairseq(seq[s], seq[a],-12, -1,M,"sim");\r
- cov[s][a]=cov[a][s]=idscore_pairseq(seq[s], seq[a],-12, -1,M,"cov");\r
- \r
- }\r
- }\r
- free_char (seq, -1);\r
- free_int (M,-1);\r
- }\r
- else \r
- {\r
- sim=aln2sim_mat (A, "idmat");\r
- cov=aln2cov (A);\r
- }\r
- \r
- for (a=0; a< A->nseq; a++)\r
- {\r
- if (a==s)continue;\r
- else \r
- {\r
- if ( sim[s][a]>maxsim || sim[s][a]<minsim|| cov[s][a]<mincov||cov[s][a]>maxcov)keep[a]=-1;\r
- else keep[a]=1;\r
- }\r
- }\r
-\r
- for ( n=0, a=0; a< A->nseq; a++)\r
- {\r
- if ( keep[a]!=-1)\r
- {\r
- list[n++]=a;\r
- }\r
- }\r
-\r
- R=extract_sub_aln (A, n, list);\r
- free_int (sim, -1); free_int (cov, -1);vfree (list);\r
- \r
- return R;\r
-}\r
- \r
-\r
-static int find_worst_seq ( int **sim, int n, int *keep, int max, int direction);\r
-Alignment *simple_trimseq (Alignment *A, Alignment *K, char *in_mode, char *seq_list)\r
-{\r
- int **sim;\r
- int *list;\r
- int *keep;\r
- int maxnseq, maxsim, nseq_ratio, nc;\r
- int new_nseq;\r
- int a,b, s, n, k;\r
- Alignment *R;\r
- char *mode;\r
- int outlayers;\r
- int direction=1;//remove the higher than\r
- int coverage=0; //remove based on coverage\r
- static char *field;\r
- int *tot_avg;\r
- \r
- if ( !field) field=vcalloc (1000, sizeof (char));\r
- \r
- mode=vcalloc ( strlen (in_mode)+10, sizeof (char));\r
- sprintf ( mode, "_%s_", in_mode);\r
- \r
- strget_param ( mode, "_%%", "0", "%d", &maxsim);\r
- strget_param ( mode, "_n", "0", "%d", &maxnseq);\r
- strget_param ( mode, "_N", "0", "%d", &nseq_ratio);\r
- strget_param ( mode, "_F", "0", "%d", &nc);\r
- strget_param ( mode, "_O", "0", "%d", &outlayers);\r
- strget_param ( mode, "_f", "NAME", "%s", field);\r
-\r
- if ( strstr (mode, "_min"))direction=-1;\r
- else direction=1;\r
- \r
- if ( strstr (mode, "_cov"))coverage=1;\r
- else coverage=0;\r
- \r
- \r
- if ( nseq_ratio)\r
- {\r
- maxnseq=(A->nseq*nseq_ratio)/100;\r
- maxsim=0;\r
- }\r
- else if ( maxnseq)\r
- {\r
- maxsim=0;\r
- }\r
- else if ( !maxsim)\r
- {\r
- maxsim=100;\r
- }\r
- \r
- \r
- keep=vcalloc ( A->nseq, sizeof (int));\r
- list=vcalloc ( A->nseq, sizeof (int));\r
- \r
- \r
- \r
- \r
- /*Remove Sequences that do not have at least one residue in the first and last nc columns*/\r
- if ( nc)\r
- {\r
- int left, right, full_n,x, y;\r
- int *full_list;\r
- \r
- Alignment *F;\r
- \r
- full_list=vcalloc ( A->nseq, sizeof (int));\r
- full_n=0;\r
- for (x=0; x< A->nseq; x++)\r
- {\r
- for ( left=0,y=0; y<MIN(A->len_aln,nc); y++)\r
- if (!is_gap(A->seq_al[x][y]))left=1;\r
- \r
- for ( right=0,y=MAX(0,(A->len_aln-nc)); y<A->len_aln; y++)\r
- if (!is_gap(A->seq_al[x][y]))right=1;\r
- \r
- if ( left && right)full_list[full_n++]=x;\r
- }\r
- F=extract_sub_aln (A, full_n, full_list);\r
- free_aln (A);\r
- vfree (full_list);\r
- A=F;\r
- }\r
- \r
- /*Reorder the sequences according to the tree order: hopefully better phylogenetic coverage after trim*/\r
- if (strstr (mode, "_T"))\r
- {\r
- NT_node **T;\r
- Sequence *O;\r
-\r
- sim=sim_array2dist_array ( sim, MAXID);\r
- T=int_dist2nj_tree (sim, A->name, A->nseq, NULL);\r
- O=tree2seq (T[3][0], NULL);\r
- A=reorder_aln (A, O->name, O->nseq);\r
- \r
- free_int (sim, -1);\r
- free_sequence (O, -1);\r
- }\r
- \r
- if ( coverage==0)\r
- {\r
- if ( strstr (mode, "seq_"))sim=seq2comp_mat (aln2seq(A), "blosum62mt", "sim");\r
- else sim=aln2sim_mat (A, "idmat");\r
- }\r
- else\r
- {\r
- int b;\r
- if ( strstr (mode, "seq_"))sim=seq2comp_mat (aln2seq(A), "blosum62mt", "cov");\r
- else sim=aln2cov (A);\r
- \r
- }\r
-\r
-\r
- if ( K && K->nseq>0)\r
- {\r
- for ( a=0; a< K->nseq; a++)\r
- if ( (k=name_is_in_list (K->name[a], A->name, A->nseq, MAXNAMES+1))!=-1)\r
- {\r
-\r
- keep[k]=1;\r
- }\r
- }\r
- if ( seq_list)\r
- {\r
- for ( a=0; a< A->nseq; a++)\r
- {\r
- if (strstr (field, "NAME") && perl_strstr (A->name[a], seq_list)){keep[a]=1;}\r
- else if (strstr (field, "COMMENT") && A->seq_comment && perl_strstr(A->seq_comment[a], seq_list)){keep[a]=1;}\r
- else if (strstr (field, "SEQ") && perl_strstr((A->S)->seq[a], seq_list)){keep[a]=1;}\r
- }\r
- \r
- \r
- }\r
- for ( a=0; a< A->nseq; a++)\r
- if ( keep[a]) fprintf ( stderr, "\nFORCED KEEP %s", A->name[a]);\r
- \r
- new_nseq=A->nseq;\r
- \r
- \r
- while ( (s=find_worst_seq (sim, A->nseq, keep, maxsim, direction))!=-1 && new_nseq>maxnseq)\r
- {\r
- for ( a=0; a< A->nseq; a++)sim[a][s]=sim[s][a]=-1;\r
- keep[s]=-1;\r
- new_nseq--;\r
- }\r
- \r
- /*Trim Outlayers*/\r
- if (outlayers!=0)\r
- {\r
- int nn, b;\r
- tot_avg=vcalloc ( A->nseq, sizeof (int));\r
- \r
- for (a=0; a<A->nseq; a++)\r
- {\r
- if ( keep[a]==-1)tot_avg[a]=-1;\r
- else\r
- {\r
- for (nn=0, b=0; b< A->nseq; b++)\r
- {\r
- if (a==b || keep[b]==-1)continue;\r
- else\r
- {\r
- tot_avg[a]+=sim[a][b];\r
- nn++;\r
- }\r
- }\r
- tot_avg[a]=(nn==0)?-1:(tot_avg[a])/nn;\r
- }\r
- }\r
- for ( a=0; a<A->nseq; a++)\r
- {\r
- if (tot_avg[a]!=-1 && tot_avg[a]<outlayers)\r
- {\r
- fprintf ( stderr, "\nREMOVED OUTLAYER: %3d %% avg similarity with remaining sequences [Seq %s]", tot_avg[a],A->name[a]);\r
- keep[a]=-1;\r
- }\r
- }\r
- vfree ( tot_avg);\r
- }\r
-\r
- for ( n=0, a=0; a< A->nseq; a++)\r
- {\r
- if ( keep[a]!=-1)\r
- {\r
- list[n++]=a;\r
- }\r
- }\r
-\r
- R=extract_sub_aln (A, n, list);\r
- free_int (sim, -1); vfree (list);\r
- \r
- return R;\r
-}\r
- \r
-int find_worst_seq ( int **sim, int n, int *keep,int max,int direction)\r
-{\r
- int **sc;\r
- int a, b, r=0;\r
- int si;\r
- \r
- sc=declare_int (n, 2);\r
- if (direction==-1)max=100-max;\r
- \r
- for ( a=0; a< n; a++) sc[a][0]=a;\r
- for ( a=0; a< n-1; a++)\r
- {\r
- for ( b=a+1; b<n; b++)\r
- {\r
- \r
- if (sim[a][b]>=0)si=(direction==-1)?100-sim[a][b]:sim[a][b];\r
- else si=sim[a][b];\r
- if ( si>max)\r
- {\r
- if ( keep[a]!=1)sc[a][1]+=si;\r
- if ( keep[b]!=1)sc[b][1]+=si;\r
- }\r
- }\r
- }\r
- \r
- sort_int_inv ( sc, 2, 1, 0, n-1);\r
- if ( sc[0][1]>0)r=sc[0][0];\r
- else r=-1;\r
- \r
- free_int (sc, -1);\r
- if (r!=-1 && keep && keep[r])return -1;\r
- else return r;\r
-}\r
-\r
-int find_worst_seq_old ( int **sim, int n, int *keep,int max,int direction)\r
-{\r
- int **sc;\r
- int a, b, r=0;\r
- \r
- sc=declare_int (n, 2);\r
- \r
- for ( a=0; a< n; a++) sc[a][0]=a;\r
- for ( a=0; a< n-1; a++)\r
- {\r
- for ( b=a+1; b<n; b++)\r
- {\r
- if ( direction==1)\r
- {\r
- if ( sim[a][b]>max)\r
- {\r
- if ( keep[a]!=1)sc[a][1]+=sim[a][b]; \r
- if ( keep[b]!=1)sc[b][1]+=sim[a][b];\r
- }\r
- }\r
- else if ( direction == -1)\r
- {\r
- if ( sim[a][b]<max && sim[a][b]>=0)\r
- {\r
- if ( keep[a]!=1)sc[a][1]+=sim[a][b]; \r
- if ( keep[b]!=1)sc[b][1]+=sim[a][b];\r
- }\r
- }\r
- }\r
- }\r
- \r
- if ( direction ==1) //remove max\r
- {\r
- sort_int_inv ( sc, 2, 1, 0, n-1);\r
- if ( sc[0][1]>0)r=sc[0][0];\r
- else r=-1;\r
- \r
- }\r
- else if ( direction ==-1)//remove min\r
- {\r
- sort_int_inv ( sc, 2, 1, 0, n-1);\r
- if ( sc[0][1]>=0)r=sc[0][0];\r
- else r=-1;\r
- HERE ("** %d %d\n", r,sc[0][1]); \r
- }\r
- free_int (sc, -1);\r
- if (r!=-1 && keep && keep[r])return -1;\r
- else return r;\r
-}\r
-\r
- \r
-Alignment * trimseq( Alignment *A, Sequence *S,char *mode)\r
- {\r
- Alignment *NA;\r
- char *p;\r
- int a, b;\r
- int use_aln=0, upper_sim=0, min_nseq=0, lower_sim=0;\r
- char weight_mode[1000];\r
- char method[1000];\r
- int statistics=0;\r
- int trim_direction=TOP;\r
- float **sim_weight;\r
- int *seq_list;\r
- int table=0;\r
- int print_name=0;\r
- float f_lower_sim, f_upper_sim;\r
- \r
- \r
-\r
- /*\r
- mode: \r
- (trim)_<seq or aln>_%<percentage of tot weight to keep>_n<number of seq to keep>_w<weight mode>\r
- */\r
- \r
-\r
- \r
- seq_list=vcalloc ( S->nseq, sizeof (int));\r
- for ( a=0; a< A->nseq; a++)\r
- {\r
- seq_list[a]=1;\r
- }\r
- \r
-\r
- use_aln=aln_is_aligned(A);\r
- \r
- \r
- if ( mode[0]=='\0')\r
- {\r
- \r
- upper_sim=50;\r
- lower_sim=0;\r
- min_nseq=0;\r
- sprintf (weight_mode, "pwsim_fragment");\r
- sprintf ( method, "clustering2");\r
- }\r
- else \r
- {\r
- \r
- upper_sim=lower_sim=min_nseq;\r
- sprintf (weight_mode, "pwsim_fragment");\r
- sprintf ( method, "clustering2");\r
- }\r
-\r
- /*\r
- U or % (deprecated) Upper bound for pairwise similarity\r
- L or m (depercated) Lower bound for pairwise similarity\r
- n max number of sequences\r
- N max number of sequences as a fraction of thet total\r
- S print Statistics\r
- T print Table of distances\r
- */\r
-\r
- \r
-\r
- while ( (p=strtok(mode, "_")))\r
- { \r
- mode=NULL;\r
- if (strm (p, "seq"))use_aln=0;\r
- else if ( strm(p,"aln"))use_aln=1;\r
- else if (p[0]=='s')statistics=1;\r
- else if (p[0]=='t')table=1;\r
- else if (p[0]=='p')print_name=1;\r
- else if (p[0]=='U')upper_sim=atoi(p+1);\r
- else if (p[0]=='L')lower_sim=atoi(p+1);\r
- else if (p[0]=='n')min_nseq=atoi(p+1);\r
- else if (p[0]=='N')min_nseq=atoi(p+1)*-1;\r
- else if (p[0]=='B')trim_direction=BOTTOM;\r
- else if (p[0]=='T')trim_direction=TOP;\r
- else if (p[0]=='W')sprintf (weight_mode, "%s", p+1);\r
- else if (p[0]=='M')sprintf (method, "%s", p+1);\r
- else if (p[0]=='K')\r
- {\r
- \r
- while ((p=strtok(NULL, ":")))\r
- {\r
- \r
- if ( (a=name_is_in_list (p, A->name, A->nseq, 100))!=-1)\r
- {\r
- seq_list[a]=2;\r
- } \r
- }\r
- }\r
- }\r
- \r
- if ( !upper_sim && !min_nseq && !lower_sim)upper_sim=50;\r
- \r
- \r
-\r
- if (!S)\r
- {\r
- fprintf ( stderr, "\ntrimseq requires a set of sequences[FATAL:%s]\n", PROGRAM);\r
- crash("");\r
- }\r
- \r
- else if ( min_nseq> S->nseq)\r
- {\r
- min_nseq=S->nseq;\r
- }\r
- else if ( min_nseq<0)\r
- {\r
- if ( min_nseq<-100)\r
- {\r
- add_warning ( stderr, "\nWARNING: trimseq: Nseq(N) max_val=100%% [Automatic reset]\n");\r
- min_nseq=-100;\r
- }\r
- \r
- min_nseq=(int)((float)S->nseq*((float)min_nseq/100)*-1);\r
- }\r
-\r
-\r
- NA=seq2subseq2 (A, S,use_aln,lower_sim,upper_sim,min_nseq,trim_direction, weight_mode,&sim_weight, seq_list );\r
- \r
- if ( table)\r
- {\r
- fprintf ( stderr, "\nSIMILARITY MATRIX\n");\r
- for ( a=0; a< A->nseq-1; a++)\r
- for ( b=a+1; b< A->nseq; b++)\r
- {\r
- fprintf ( stderr, "%15s Vs %15s : %3.2f %% id\n", A->name[a], A->name[b], 100-sim_weight[a][b]);\r
- }\r
- }\r
- \r
- NA=seq_name2removed_seq_name(S, NA,sim_weight);\r
-\r
- if ( print_name)\r
- {\r
- fprintf ( stderr, "\nList of sequences with their closest removed neighbors\n");\r
- for ( a=0; a< NA->nseq; a++)fprintf ( stderr, "\n%s: %s\n", NA->name[a], NA->seq_comment[a]);\r
- }\r
- \r
- if ( statistics)\r
- {\r
- f_lower_sim=(lower_sim>100)?(float)lower_sim/100:lower_sim;\r
- f_upper_sim=(upper_sim>100)?(float)upper_sim/100:upper_sim;\r
-\r
- fprintf ( stderr, "\nTRIM seq Informations:\n");\r
- fprintf ( stderr, "\tUse...........: %s\n",(use_aln)?"multiple_aln":"pairwise_aln");\r
- fprintf ( stderr, "\tcluster_mode..: %s\n" ,method);\r
- fprintf ( stderr, "\tsim_mode......: %s\n" ,weight_mode);\r
- fprintf ( stderr, "\tlower_id_bound: %.2f%%\n" ,(f_lower_sim==0)?-1:f_lower_sim);\r
- fprintf ( stderr, "\tupper_id_bound: %.2f%%\n",(f_upper_sim==0)?-1:f_upper_sim);\r
- fprintf ( stderr, "\tnseq_kept.....: %d (out of %d)\n" ,NA->nseq, S->nseq);\r
- fprintf ( stderr, "\treduction.....: %d%% of original set\n" ,(NA->nseq*100)/S->nseq);\r
- fprintf ( stderr, "\tTrim_direction: From %s \n" ,(trim_direction==BOTTOM)?"Bottom":"Top");\r
- }\r
-\r
- return NA;\r
- }\r
- \r
-Alignment * tc_trimseq( Alignment *A, Sequence *S,char *mode)\r
- {\r
- Alignment *NA;\r
- Sequence *TS;\r
- char *trimfile, *alnfile;\r
- int *seq_list;\r
- int a, nseq=0, sim=0;\r
- char *p;\r
- char command[100000];\r
- char keep_list[10000];\r
- \r
- int top, bottom, middle, pmiddle;\r
- \r
- keep_list[0]='\0';\r
- \r
- seq_list=vcalloc ( S->nseq, sizeof (int));\r
- for ( a=0; a< A->nseq; a++)\r
- {\r
- seq_list[a]=1;\r
- }\r
- \r
- trimfile=vtmpnam (NULL);\r
- alnfile=vtmpnam (NULL);\r
- if ( !aln_is_aligned (A))\r
- {\r
- fprintf ( stderr, "\ntrimTC: computation of an Approximate MSA [");\r
- A=compute_tcoffee_aln_quick ( A, NULL);\r
- fprintf ( stderr, "DONE]\n");\r
- }\r
- output_clustal_aln (alnfile, A);\r
- \r
- \r
- while ( (p=strtok(mode, "#")))\r
- { \r
- mode=NULL;\r
-\r
- \r
- if (p[0]=='%' || p[0]=='S')sim=(p[1]=='%')?atoi(p+2):atoi(p+1);\r
- else if (p[0]=='n' || p[0]=='N')nseq=atoi(p+1);\r
- else if (p[0]=='K')\r
- {\r
- if ( (a=name_is_in_list (p+1, A->name, A->nseq, 100))!=-1)\r
- {\r
- seq_list[a]=2;\r
- } \r
- \r
- }\r
- }\r
- if ( nseq ==0 && sim ==0)\r
- {\r
- fprintf ( stderr, "\nERROR: trimTC\nIndicate the maximum number of sequences Nnseq\nOR the maximum average similarity of the chosen sequencesSx\nEX: +trimTC S20 OR +trimTC N5"); \r
- fprintf ( stderr, "\n[FATAL:%s]", PROGRAM);\r
- myexit (EXIT_FAILURE);\r
- }\r
- \r
- for ( a=0; a<A->nseq; a++)if (seq_list[a]==2){strcat ( keep_list, A->name[a]);strcat ( keep_list," ");}\r
- \r
- if ( sim)\r
- {\r
- sprintf ( command , "%s -infile %s -trim -trimfile=%s -split_score_thres %d -convert -iterate 0 ",get_string_variable("t_coffee"), alnfile, trimfile,sim);\r
- if ( keep_list[0]){strcat ( command, " -seq_to_keep ");strcat ( command, keep_list);}\r
- my_system ( command);\r
- TS=read_sequences (trimfile);\r
- }\r
- else if ( nseq && A->nseq>nseq)\r
- {\r
- \r
- top=100;bottom=0;\r
- pmiddle=0;middle=50;\r
- \r
- sprintf ( command , "%s -infile %s -trim -trimfile=%s -split_score_thres %d -convert -iterate 0",get_string_variable("t_coffee"), alnfile, trimfile,middle);\r
- if ( keep_list[0]){strcat ( command, " -seq_to_keep ");strcat ( command, keep_list);}\r
- my_system ( command);\r
- \r
- TS=read_sequences (trimfile);\r
- fprintf ( stderr, "\n\tTrimTC: Sim %d Nseq %d\t",middle, TS->nseq);\r
- \r
- if ( TS->nseq>nseq)top=middle;\r
- else if ( TS->nseq<nseq)bottom=middle;\r
- pmiddle=middle;\r
- middle=(top-bottom)/2+bottom;\r
- \r
- while (TS->nseq!=nseq && pmiddle!=middle)\r
- {\r
- \r
- sprintf ( command , "%s -infile %s -trim -trimfile=%s -split_score_thres %d -convert -iterate 0 ",get_string_variable("t_coffee"), alnfile, trimfile,middle);\r
- if ( keep_list[0]){strcat ( command, " -seq_to_keep ");strcat ( command, keep_list);}\r
- my_system ( command);\r
- free_sequence (TS, -1);\r
- TS=read_sequences (trimfile);\r
- fprintf ( stderr, "\n\tTrimTC: Sim %d Nseq %d\t", middle, TS->nseq);\r
- \r
- if ( TS->nseq>nseq)top=middle;\r
- else if ( TS->nseq<nseq)bottom=middle;\r
- pmiddle=middle;\r
- middle=(top-bottom)/2+bottom;\r
- }\r
- }\r
- else\r
- {\r
- TS=aln2seq (A);\r
- }\r
- NA=seq2aln (TS, NULL, 1);\r
- vremove ( alnfile);\r
- fprintf ( stderr, "\n");\r
- \r
- return NA;\r
- } \r
-\r
-Alignment* seq2subseq3( Alignment *A, Sequence *S,int use_aln, int int_lower_sim,int int_upper_sim, int min_nseq, int trim_direction, char *weight_mode, float ***sim_weight, int *seq_list)\r
-{\r
- int a, b;\r
- int new_nseq;\r
-\r
- /*OUTPUT*/\r
- char **seq, **name;\r
- Sequence *NS;\r
- Alignment *NA;\r
- float sim, lower_sim, upper_sim;\r
- \r
- lower_sim=(int_lower_sim>100)?(float)int_lower_sim/100:int_lower_sim;\r
- upper_sim=(int_upper_sim>100)?(float)int_upper_sim/100:int_upper_sim;\r
-\r
- sim_weight[0]=get_weight ((use_aln)?A:NULL, S, weight_mode);\r
- \r
- name=declare_char (S->nseq, (MAXNAMES+1));\r
- seq= declare_char (S->nseq, S->max_len+1);\r
- \r
- /*\r
- Remove every sequence that is more than upper_sim and less than lower_sim similar to the master sequences\r
- the master sequence(s) are those for which seq_list[x]==2\r
- */\r
-\r
-\r
- \r
-\r
- new_nseq=A->nseq;\r
- \r
-\r
- for (a=0; a< A->nseq; a++)\r
- {\r
- if ( seq_list[a]==2)\r
- {\r
- \r
- for ( b=0; b< A->nseq;b++)\r
- {\r
- sim=100-sim_weight[0][a][b];\r
- if (seq_list[b]==1 && (sim>upper_sim || sim<lower_sim))\r
- {\r
- seq_list[b]=0;\r
- new_nseq--;\r
- }\r
- }\r
- \r
- }\r
- }\r
- \r
- /*Prepare the new sequence List*/\r
-\r
- for (b=0, a=0; a<S->nseq; a++) \r
- {\r
- if ( seq_list[a])\r
- {\r
- sprintf ( name[b], "%s", S->name[a]);\r
- sprintf ( seq[b] , "%s",(use_aln)?A->seq_al[a]: S->seq[a] );\r
- b++;\r
- }\r
- }\r
- \r
- \r
- NS=fill_sequence_struc (new_nseq,seq,name);\r
- NA=seq2aln(NS,NULL,1); \r
- \r
- if ( use_aln && A)\r
- {\r
- NA=realloc_aln2 ( NA,A->max_n_seq,A->len_aln+1);\r
- \r
- for (b=0, a=0; a<S->nseq; a++) \r
- {\r
- if ( seq_list[a])\r
- {\r
- sprintf ( NA->seq_al[b] , "%s",A->seq_al[a]);\r
- b++;\r
- }\r
- }\r
-\r
- NA->len_aln=A->len_aln;\r
- ungap_aln(NA);\r
- }\r
- \r
-\r
- return NA;\r
-}\r
-Alignment* seq2subseq2( Alignment *A, Sequence *S,int use_aln, int int_lower_sim,int int_upper_sim, int min_nseq, int trim_direction, char *weight_mode, float ***sim_weight, int *seq_list)\r
-{\r
- int a, b;\r
- int new_nseq;\r
- int seq_index=0;\r
- /*OUTPUT*/\r
- char **seq, **name;\r
- Sequence *NS;\r
- Alignment *NA;\r
- float lower_sim, upper_sim;\r
- \r
- lower_sim=(int_lower_sim>100)?(float)int_lower_sim/100:int_lower_sim;\r
- upper_sim=(int_upper_sim>100)?(float)int_upper_sim/100:int_upper_sim;\r
-\r
- \r
- sim_weight[0]=get_weight ((use_aln)?A:NULL, S, weight_mode);\r
- \r
- name=declare_char (S->nseq, (MAXNAMES+1));\r
- seq= declare_char (S->nseq, S->max_len+1);\r
- \r
- /*\r
- 1 REMOVE OUTLAYERS\r
- 2 REMOVE CLOSELY RELATED SEQUENCES\r
- 3 IF STILL TOO MANY SEQUENCES:\r
- REMOVE THE MOST CLOSELY RELATED ONES\r
- */\r
-\r
-\r
- /*1 Remove outlayers*/\r
- \r
- new_nseq=A->nseq;\r
- \r
- \r
- /*1 Remove outlayers*/\r
- while ( lower_sim && (extreme_seq(BOTTOM,A,sim_weight[0],seq_list, &seq_index) <lower_sim) && ((new_nseq)>min_nseq) && seq_index!=-1)\r
- {\r
- \r
- if ( seq_list[seq_index]==1)\r
- {\r
- seq_list[seq_index]=0;\r
- new_nseq--;\r
- }\r
- }\r
- /*2 Remove close relative*/\r
- \r
- \r
- while ( upper_sim && (extreme_seq(TOP, A,sim_weight[0],seq_list, &seq_index)>upper_sim) && ((new_nseq)>min_nseq)&& seq_index!=-1)\r
- { \r
- \r
- if ( seq_list[seq_index]==1)\r
- {\r
- seq_list[seq_index]=0;\r
- new_nseq--;\r
- }\r
- }\r
-\r
-\r
- /*Remove extra sequences*/\r
- \r
- while ( min_nseq>0 && new_nseq>min_nseq && seq_index!=-1)\r
- {\r
- \r
- extreme_seq(trim_direction, A,sim_weight[0],seq_list, &seq_index);\r
- \r
- if ( seq_index==-1)break;\r
- if ( seq_list[seq_index]==1)\r
- {\r
- seq_list[seq_index]=0;\r
- new_nseq--;\r
- }\r
- }\r
-\r
-\r
- /*Prepare the new sequence List*/\r
-\r
- for (b=0, a=0; a<S->nseq; a++) \r
- {\r
- if ( seq_list[a])\r
- {\r
- sprintf ( name[b], "%s", S->name[a]);\r
- sprintf ( seq[b] , "%s",(use_aln)?A->seq_al[a]: S->seq[a] );\r
- b++;\r
- }\r
- }\r
- \r
- \r
- NS=fill_sequence_struc (new_nseq,seq,name);\r
- NA=seq2aln(NS,NULL,1); \r
- \r
- if ( use_aln && A)\r
- {\r
- NA=realloc_aln2 ( NA,A->max_n_seq,A->len_aln+1);\r
- \r
- for (b=0, a=0; a<S->nseq; a++) \r
- {\r
- if ( seq_list[a])\r
- {\r
- sprintf ( NA->seq_al[b],"%s",A->seq_al[a]);\r
- b++;\r
- }\r
- }\r
-\r
- NA->len_aln=A->len_aln;\r
- ungap_aln(NA);\r
- }\r
- \r
-\r
- return NA;\r
-}\r
-\r
-float extreme_seq (int direction, Alignment *A,float **sim_weight,int *seq_list, int *seq_index)\r
-{\r
- \r
- /*find the closest relative of each sequence\r
- Return:\r
- Direction= BOTTOM: the sequence whose closest relative is the most distant\r
- Direction= TOP: the sequence whose closest relative is the closest\r
- weight: different sequences=100\r
- similar sequences =0\r
- */\r
- int a, b;\r
- \r
- float top_sim,bottom_sim, best_sim, sim;\r
- int top_seq, bottom_seq;\r
- \r
- bottom_seq=top_seq=seq_index[0]=-1;\r
- top_sim=-1;\r
- bottom_sim=101;\r
- \r
- for (a=0; a< A->nseq; a++)\r
- {\r
- if (seq_list[a]!=1)continue;\r
-\r
- for ( best_sim=0, b=0; b< A->nseq; b++)\r
- {\r
- if ( a==b || !seq_list[b])continue;\r
- \r
- sim=100-sim_weight[a][b];\r
- if (sim>best_sim)\r
- {\r
- best_sim=sim;\r
- }\r
- }\r
-\r
- if ( best_sim>top_sim)\r
- {\r
- top_seq=a;\r
- top_sim=best_sim;\r
- }\r
- \r
- if ( best_sim<bottom_sim)\r
- {\r
- bottom_seq=a;\r
- bottom_sim=best_sim;\r
- }\r
- \r
- }\r
- if ( direction==BOTTOM ){seq_index[0]= bottom_seq; return bottom_sim;}\r
- else if ( direction==TOP){seq_index[0]= top_seq; return top_sim;}\r
- else\r
- {\r
- seq_index[0]=-1;\r
- return -1;\r
- }\r
-}\r
- \r
- \r
- \r
- \r
-Alignment* seq2subseq1( Alignment *A, Sequence *S,int use_aln, int percent,int max_nseq, int ms,char *weight_mode)\r
- {\r
- float **pw_weight,**sim_weight, **seq_weight;\r
- int a,b,c,d;\r
- float sum, chosen,last_chosen, last_nchosen,nchosen;\r
- int condition1, condition2;\r
- Sequence *NS;\r
- Alignment *NA;\r
- char **name, **seq;\r
- float score, best_score;\r
- int best_seq=0;\r
- int *seq_list, *used_seq_list;\r
- \r
- /*\r
- mode: \r
- (trim)_<seq or aln>_%<percentage of tot weight to keep>_n<number of seq to keep>_w<weight mode>\r
- */\r
- \r
- sim_weight=get_weight ((use_aln)?A:NULL, S, weight_mode);\r
- pw_weight=declare_float (S->nseq, S->nseq);\r
- seq_weight=declare_float ( S->nseq, 2);\r
-\r
- \r
- for (best_score=0,a=0; a<S->nseq; a++)\r
- {\r
- for ( b=0; b<S->nseq; b++)\r
- {\r
- if ( a==b)continue;\r
- seq_weight[a][0]+=sim_weight[a][b];\r
- }\r
- seq_weight[a][0]=seq_weight[a][0]/(S->nseq-1);\r
- score=seq_weight[a][0]=100-seq_weight[a][0];\r
- \r
- if ( score>best_score)\r
- {\r
- best_seq=a;\r
- best_score=score;\r
- }\r
-\r
- }\r
- for (a=0; a<S->nseq; a++)\r
- {\r
- for ( b=0; b<S->nseq; b++)\r
- {\r
- if ( a==b)continue;\r
- pw_weight[a][b]=sim_weight[a][b]*seq_weight[a][0]*seq_weight[b][0]/(100*100);\r
- \r
- }\r
- }\r
- \r
- \r
- seq_list=vcalloc ( S->nseq, sizeof (int));\r
- used_seq_list=vcalloc ( S->nseq, sizeof (int));\r
-\r
- \r
-\r
- name=declare_char (S->nseq, (MAXNAMES+1));\r
- seq= declare_char (S->nseq, S->max_len+1);\r
- \r
- /*compute the normalization factor*/\r
- for (sum=0,d=0; d< S->nseq; d++)\r
- {\r
- for (score=0,c=0; c<S->nseq; c++)\r
- {\r
- if ( c!=d)\r
- score=MAX(score, 100-sim_weight[c][d]);\r
- }\r
- sum+=score;\r
- }\r
- sum=sum/S->nseq;\r
- /*chose the first sequence */\r
- for ( best_score=0,a=0; a< S->nseq; a++)\r
- {\r
- for (score=0, b=0; b< S->nseq; b++)\r
- {\r
- score+=100-sim_weight[a][b];\r
- }\r
- if ( score>best_score)\r
- {\r
- best_seq=a;\r
- best_score=score;\r
- }\r
- \r
- }\r
-\r
-\r
- last_chosen=chosen=((best_score/S->nseq)*100)/sum;\r
- nchosen=last_nchosen=1;\r
- seq_list[0]=best_seq;\r
- used_seq_list[best_seq]=1;\r
-\r
- sprintf ( name[0],"%s", S->name[seq_list[0]]);\r
- sprintf ( seq[0],"%s", S->seq[seq_list[0]]);\r
- nchosen=last_nchosen=1;\r
- \r
-\r
- fprintf ( stderr, "\nTRIM:\n");\r
- fprintf ( stderr, "\n1-Chosen Sequences\n");\r
- /*Assemble the list of sequences*/\r
- for (a=1; a< S->nseq; a++)\r
- {\r
- for (best_score=0,b=0; b< S->nseq; b++)\r
- {\r
- if (used_seq_list[b]);\r
- else\r
- {\r
- score=pw_weight[seq_list[0]][b]+1;\r
- for (c=0; c<a; c++)\r
- score=MIN(score,pw_weight[seq_list[c]][b]);\r
- \r
- if ( score>=best_score)\r
- {\r
- best_seq=b;\r
- best_score=score;\r
- }\r
- \r
- }\r
- }\r
- seq_list[a]=best_seq;\r
- used_seq_list[best_seq]=1;\r
- \r
- \r
-\r
- for ( chosen=0,d=0; d< S->nseq; d++)\r
- {\r
- for (score=0, c=0; c<=a; c++)\r
- {\r
- if ( seq_list[c]!=d)\r
- score=MAX(score, 100-sim_weight[seq_list[c]][d]);\r
- }\r
- chosen+=score;\r
- \r
- }\r
- \r
- chosen=((chosen/S->nseq)*100)/sum;\r
- nchosen=a+1;\r
- \r
- condition1= (int)chosen<=(int)percent || !percent;\r
- condition2=(nchosen)<=max_nseq || !max_nseq;\r
- \r
- if (condition1 && condition2)\r
- {\r
- fprintf ( stderr, "\tADD %s (set score: %.2f %%)\n", S->name[seq_list[a]], chosen);\r
- sprintf ( name[a],"%s", S->name[seq_list[a]]);\r
- sprintf ( seq[a],"%s", S->seq[seq_list[a]]);\r
- \r
- }\r
- else\r
- {\r
- break; \r
- }\r
- last_chosen=chosen;\r
- last_nchosen=nchosen;\r
- }\r
- \r
- NS=fill_sequence_struc (last_nchosen,seq,name);\r
- NA=seq2aln(NS,NULL,1); \r
- fprintf ( stderr, "\n2-Informations:\n");\r
- fprintf ( stderr, "\tUse...........: %s\n",(use_aln)?"multiple_aln":"pairwise_aln");\r
- fprintf ( stderr, "\tweight_mode...: %s\n" ,weight_mode);\r
- fprintf ( stderr, "\tpercent_weight: %.2f%% (max=%d%%)\n",last_chosen,percent);\r
- fprintf ( stderr, "\tn_seq.........: %d\n" ,NS->nseq);\r
- fprintf ( stderr, "\treduction.....: %d%% of original set\n" ,(NS->nseq*100)/S->nseq);\r
- \r
- return NA;\r
- } \r
-float ** get_weight ( Alignment *A, Sequence *S, char *mode)\r
-{\r
- char *aln_name;\r
- char *weight_name;\r
- char *seq_name;\r
- char command[LONG_STRING];\r
- char program[LONG_STRING];\r
- float **weight;\r
- FILE *fp;\r
- int c;\r
- \r
- if ( !mode || !mode[0] || strm (mode, "msa"))\r
- {\r
- if ( getenv ( "SEQ2MSA_WEIGHT")==NULL)sprintf (program, "%s",SEQ2MSA_WEIGHT);\r
- else sprintf ( program, "%s", (getenv ( "SEQ2MSA_WEIGHT"))); \r
- }\r
- else if ( strm(mode, "pwsim") ||strm(mode, "pwsim_fragment") )\r
- {\r
- return seq2pwsim (A, S, mode);\r
- }\r
- else\r
- {\r
- if (getenv (mode))sprintf ( program, "%s", (getenv (mode)));\r
- else fprintf ( stderr, "\nERROR: %s is not a valid mode for weight computation [FATAL:%s]", mode, PROGRAM);\r
- }\r
-\r
- /*MSA weights*/\r
- seq_name=vtmpnam(NULL);\r
- aln_name=vtmpnam(NULL);\r
- weight_name=vtmpnam(NULL);\r
- weight=declare_float (S->nseq+1, 2);\r
- \r
-\r
- \r
- if (A)\r
- {\r
- output_clustal_aln (seq_name,A);\r
- output_fasta_seq (aln_name,A);\r
- sprintf ( command, "%s %s -i %s -w %s", program, seq_name, aln_name, weight_name);\r
- }\r
- else\r
- {\r
- A=seq2aln(S,A,1);\r
- output_fasta_seq (seq_name,A);\r
- sprintf ( command, "%s %s -w %s", program, seq_name, weight_name);\r
- }\r
- \r
- \r
- my_system ( command);\r
- \r
- fp=vfopen( weight_name, "r");\r
- while ( (c=fgetc(fp))!='$');\r
- c=fgetc(fp);\r
- c=0;\r
- while ( (fscanf (fp, "%*s %f\n",&(weight[c][1])))==1)\r
- {weight[c][0]=c;c++;}\r
- vfclose (fp);\r
- \r
- \r
- return weight;\r
-}\r
-\r
-float **seq2pwsim ( Alignment *A, Sequence *S, char *mode)\r
-{\r
- int a, b, c;\r
- float d,t;\r
- float **W;\r
- Alignment *B;\r
- W=declare_float (S->nseq, S->nseq);\r
-\r
-\r
-\r
- for (a=0; a< S->nseq; a++)\r
- for ( b=a; b<S->nseq; b++)\r
- {\r
- if ( a==b){d=1;}\r
- else if (!A)\r
- {\r
-\r
- B=align_two_sequences ((S)->seq[a], (S)->seq[b],"pam250mt", -10, -1, "fasta_pair_wise");\r
- for (t=0,d=0,c=0; c<B->len_aln; c++)\r
- {\r
- d+=(B->seq_al[0][c]==B->seq_al[1][c] && !is_gap(B->seq_al[0][c]));\r
- t+=(!is_gap(B->seq_al[0][c]) && !is_gap(B->seq_al[1][c]));\r
- }\r
- t=(strm ( mode, "pwsim_fragment"))?B->len_aln:t;\r
- \r
- d=d/((t==0)?1:t);\r
- free_aln(B);\r
- }\r
- else\r
- {\r
- for (t=0,d=0,c=0; c<A->len_aln; c++)\r
- {\r
- d+=(A->seq_al[a][c]==A->seq_al[b][c] && !is_gap(A->seq_al[a][c]));\r
- t+=(!is_gap(A->seq_al[a][c]) && !is_gap(A->seq_al[b][c]));\r
- }\r
- d=d/((t==0)?1:t);\r
- }\r
- \r
-\r
- W[a][b]=W[b][a]=(1-d)*100;\r
- }\r
- \r
- \r
- return W;\r
-\r
-}\r
- \r
-float **seq2pwsim_fragment ( Alignment *A, Sequence *S, char *mode)\r
-{\r
-\r
- \r
- int a, b, c;\r
- float d,t;\r
- float **W;\r
- Alignment *B;\r
- W=declare_float (S->nseq, S->nseq);\r
-\r
-\r
-\r
-\r
- for (a=0; a< S->nseq; a++)\r
- for ( b=a; b<S->nseq; b++)\r
- {\r
- if ( a==b){d=1;}\r
- else if (!A)\r
- {\r
-\r
- B=align_two_sequences ((S)->seq[a], (S)->seq[b],"pam250mt", -10, -1, "fasta_pair_wise");\r
- for (t=0,d=0,c=0; c<B->len_aln; c++)\r
- {\r
- d+=(B->seq_al[0][c]==B->seq_al[1][c] && !is_gap(B->seq_al[0][c]));\r
- t+=(!is_gap(B->seq_al[0][c]) && !is_gap(B->seq_al[1][c]));\r
- }\r
- \r
- d=d/((t==0)?1:t);\r
- free_aln(B);\r
- }\r
- else\r
- {\r
- for (t=0,d=0,c=0; c<A->len_aln; c++)\r
- {\r
- d+=(A->seq_al[a][c]==A->seq_al[b][c] && !is_gap(A->seq_al[a][c]));\r
- t+=(!is_gap(A->seq_al[a][c]) && !is_gap(A->seq_al[b][c]));\r
- }\r
- d=d/((t==0)?1:t);\r
- }\r
- \r
-\r
- W[a][b]=W[b][a]=(1-d)*100;\r
- }\r
- \r
- \r
- return W;\r
-\r
-} \r
-\r
-/********************************************************************/\r
-/* */\r
-/* AMINO ACID FUNCTIONS */\r
-/* */\r
-/* */\r
-/* */\r
-/********************************************************************/\r
-//Builds an extended alphabet from a string\r
-char** string2alphabet (char *string, int depth, int *falp_size)\r
-{\r
- int max_s;\r
- int a, b,c, l, n;\r
- char buf[1000];\r
- char **alp;\r
- int alp_size;\r
- \r
- char ***alp2;\r
- int *alp2_size;\r
- \r
- int *array;\r
- char **falp;\r
- \r
- \r
- l=strlen (string);\r
- array=vcalloc ( 256, sizeof (int));\r
- \r
-\r
- max_s=l+1;\r
- falp_size[0]=0;\r
- falp=declare_char (l+1, 2);\r
- \r
- alp=declare_char(l,2);\r
- alp_size=0;\r
- \r
- array=vcalloc ( 256, sizeof (int));\r
- for (a=0;a<l; a++)\r
- {\r
- if (!array[(int)string[a]])\r
- {\r
- array[(int)string[a]]=1;\r
- sprintf (alp[alp_size++], "%c", string[a]);\r
- sprintf (falp[falp_size[0]++], "%c", string[a]);\r
- }\r
- }\r
- sprintf ( falp[falp_size[0]++], "*");\r
- vfree (array);\r
- \r
- if ( depth==1)\r
- {\r
- free_char (alp, -1);\r
- return falp;\r
- }\r
- alp2=vcalloc ( depth, sizeof (char**));\r
- alp2_size=vcalloc (depth, sizeof (int));\r
- \r
- for (a=0; a<depth; a++)\r
- {\r
- alp2[a]=alp;\r
- alp2_size[a]=alp_size;\r
- }\r
- \r
-\r
- for (a=2; a<=depth; a++)\r
- {\r
- char ***result_array;\r
- \r
- result_array=generate_array_string_list (a, alp2, alp2_size, &n, NULL, NO_OVERLAP);\r
- max_s+=n+1;\r
- falp=vrealloc (falp, sizeof (char**)*max_s);\r
- for (b=0; b<n; b++)\r
- {\r
- buf[0]='\0';\r
- for (c=0; c<a; c++)\r
- {\r
- strcat (buf, result_array[b][c]);\r
- }\r
- falp[falp_size[0]]=vcalloc (strlen (buf)+1, sizeof (char));\r
- sprintf ( falp[falp_size[0]++], "%s", buf);\r
- vfree ( result_array[b]);\r
- }\r
- vfree (result_array);\r
- \r
- }\r
-\r
- falp[falp_size[0]]=vcalloc (2, sizeof (char));\r
- sprintf ( falp[falp_size[0]++], "*");\r
- free_char (alp, -1);\r
- return falp;\r
-}\r
-\r
-char** make_group_aa (int *ngroup, char *mode)\r
- {\r
-/*mode: indicates which matrix will be used for the grouping*/\r
-/*n_group: pointer to the number of groups */\r
-/*return value: an array of strings containing the AA of each group */\r
-\r
-\r
- int **matrix;\r
- int a, b,c,is_in;\r
- char buf[28];\r
- char **group_list;\r
- char *matrix_name;\r
- int extend=0;\r
- matrix_name=vcalloc ( 100, sizeof (char));\r
-\r
- if (ngroup[0]==-1)extend=1;\r
- \r
- ngroup[0]=0;\r
- group_list=declare_char ( 100, 27);\r
- \r
- if (extend)\r
- {\r
- sprintf ( group_list[ngroup[0]++], "gG");\r
- sprintf ( group_list[ngroup[0]++], "pP");\r
- sprintf ( group_list[ngroup[0]++], "aA");\r
- sprintf ( group_list[ngroup[0]++], "cC");\r
- sprintf ( group_list[ngroup[0]++], "dD");\r
- sprintf ( group_list[ngroup[0]++], "eE");\r
- \r
- sprintf ( group_list[ngroup[0]++], "fF"); \r
- sprintf ( group_list[ngroup[0]++], "hH");\r
- sprintf ( group_list[ngroup[0]++], "iI");\r
- sprintf ( group_list[ngroup[0]++], "kK");\r
- sprintf ( group_list[ngroup[0]++], "lL");\r
- sprintf ( group_list[ngroup[0]++], "mM");\r
- sprintf ( group_list[ngroup[0]++], "nN");\r
- sprintf ( group_list[ngroup[0]++], "qQ");\r
- sprintf ( group_list[ngroup[0]++], "rR");\r
- \r
- sprintf ( group_list[ngroup[0]++], "sS");\r
- sprintf ( group_list[ngroup[0]++], "tT");\r
- sprintf ( group_list[ngroup[0]++], "vV");\r
- sprintf ( group_list[ngroup[0]++], "wW");\r
- sprintf ( group_list[ngroup[0]++], "*");\r
- }\r
- \r
- if ( mode && mode[0]=='_'){mode++;sprintf ( matrix_name, "%s", mode);}\r
-\r
- if (mode==NULL || mode[0]=='\0')sprintf ( matrix_name, "idmat");\r
- else if ( strstr (mode, "sim") || strm (mode, "idmat") || mode==NULL)\r
- { \r
- sprintf ( group_list[ngroup[0]++], "aA");\r
- sprintf ( group_list[ngroup[0]++], "bB");\r
- sprintf ( group_list[ngroup[0]++], "cC");\r
- sprintf ( group_list[ngroup[0]++], "dD");\r
- sprintf ( group_list[ngroup[0]++], "eE");\r
- sprintf ( group_list[ngroup[0]++], "fF");\r
- sprintf ( group_list[ngroup[0]++], "gG");\r
- sprintf ( group_list[ngroup[0]++], "hH");\r
- sprintf ( group_list[ngroup[0]++], "iI");\r
- sprintf ( group_list[ngroup[0]++], "jJ");\r
- sprintf ( group_list[ngroup[0]++], "kK");\r
- sprintf ( group_list[ngroup[0]++], "lL");\r
- sprintf ( group_list[ngroup[0]++], "mM");\r
- sprintf ( group_list[ngroup[0]++], "nN");\r
- sprintf ( group_list[ngroup[0]++], "oO");\r
- sprintf ( group_list[ngroup[0]++], "pP");\r
- sprintf ( group_list[ngroup[0]++], "qQ");\r
- sprintf ( group_list[ngroup[0]++], "rR");\r
- sprintf ( group_list[ngroup[0]++], "sS");\r
- sprintf ( group_list[ngroup[0]++], "tT");\r
- sprintf ( group_list[ngroup[0]++], "uU");\r
- sprintf ( group_list[ngroup[0]++], "vV");\r
- sprintf ( group_list[ngroup[0]++], "wW");\r
- sprintf ( group_list[ngroup[0]++], "xX");\r
- sprintf ( group_list[ngroup[0]++], "yY");\r
- sprintf ( group_list[ngroup[0]++], "zZ");\r
- vfree (matrix_name);\r
- return group_list;\r
- }\r
- else if ( strm (mode, "simple"))\r
- {\r
- sprintf ( group_list[ngroup[0]++], "avilmAVILM");\r
- sprintf ( group_list[ngroup[0]++], "dekrDEKR");\r
- sprintf ( group_list[ngroup[0]++], "stcnqhSTCNQH");\r
- sprintf ( group_list[ngroup[0]++], "wfyWFY");\r
- sprintf ( group_list[ngroup[0]++], "gG");\r
- sprintf ( group_list[ngroup[0]++], "pP");\r
- vfree (matrix_name);\r
- return group_list;\r
- }\r
-\r
- else if ( strm (mode, "mafft"))\r
- {\r
- \r
- \r
- sprintf ( group_list[ngroup[0]++],"agjopstAGJOPST");\r
- sprintf ( group_list[ngroup[0]++],"ilmvILMV");\r
- sprintf ( group_list[ngroup[0]++],"bdenqzBDENQZ");\r
- sprintf ( group_list[ngroup[0]++],"hkrHKR");\r
- sprintf ( group_list[ngroup[0]++],"fwyFWY");\r
- sprintf ( group_list[ngroup[0]++],"cC");\r
- vfree (matrix_name);\r
- return group_list;\r
- }\r
- else if ( strm (mode, "clustalw"))\r
- {\r
- \r
- sprintf ( group_list[ngroup[0]++],"astaASTA");\r
- sprintf ( group_list[ngroup[0]++],"bneqkBNEQK");\r
- sprintf ( group_list[ngroup[0]++],"cnhqkCNHQK");\r
- sprintf ( group_list[ngroup[0]++],"dndeqDNDEQ");\r
- sprintf ( group_list[ngroup[0]++],"eqhrkEQHRK");\r
- sprintf ( group_list[ngroup[0]++],"fmilvFMILV");\r
- sprintf ( group_list[ngroup[0]++],"gmilfGMILF");\r
- sprintf ( group_list[ngroup[0]++],"hhyHHY");\r
- sprintf ( group_list[ngroup[0]++],"ifywIFYW");\r
- sprintf ( group_list[ngroup[0]++],"jcJC");\r
- sprintf ( group_list[ngroup[0]++],"kpKP");\r
- vfree (matrix_name);\r
- return group_list;\r
- }\r
- else if ( strm (mode, "polarity"))\r
- {\r
- \r
- sprintf ( group_list[ngroup[0]++],"eqrsdnkhtEQRSDNKHT");\r
- sprintf ( group_list[ngroup[0]++],"pP");\r
- sprintf ( group_list[ngroup[0]++],"gG");\r
- sprintf ( group_list[ngroup[0]++],"cC");\r
- sprintf ( group_list[ngroup[0]++],"fywFYW");\r
- sprintf ( group_list[ngroup[0]++],"iavlmIAVLM");\r
- vfree (matrix_name);\r
- return group_list;\r
- }\r
- else if ( strm (mode, "vasiliky"))\r
- {\r
- ngroup[0]=0;\r
- sprintf ( group_list[ngroup[0]++], "rkRK");\r
- sprintf ( group_list[ngroup[0]++], "deDE");\r
- sprintf ( group_list[ngroup[0]++], "qhQH");\r
- sprintf ( group_list[ngroup[0]++], "vilmVILM");\r
- sprintf ( group_list[ngroup[0]++], "fyFY");\r
- sprintf ( group_list[ngroup[0]++], "sS");\r
- sprintf ( group_list[ngroup[0]++], "wW");\r
- sprintf ( group_list[ngroup[0]++], "aA");\r
- sprintf ( group_list[ngroup[0]++], "cC");\r
- sprintf ( group_list[ngroup[0]++], "gG");\r
- sprintf ( group_list[ngroup[0]++], "nN");\r
- sprintf ( group_list[ngroup[0]++], "pP");\r
- sprintf ( group_list[ngroup[0]++], "tT");\r
- vfree (matrix_name);\r
- return group_list;\r
- }\r
- else if ( strm (mode, "clustalw_col"))\r
- {\r
- sprintf ( group_list[ngroup[0]++], "staSTA");\r
- sprintf ( group_list[ngroup[0]++], "neqkNEQK");\r
- sprintf ( group_list[ngroup[0]++], "nhqkNHQK");\r
- sprintf ( group_list[ngroup[0]++], "ndeqNDEQ");\r
- sprintf ( group_list[ngroup[0]++], "qhrkQHRK");\r
- sprintf ( group_list[ngroup[0]++], "milvMILV");\r
- sprintf ( group_list[ngroup[0]++], "milfMILF");\r
- sprintf ( group_list[ngroup[0]++], "hyHY");\r
- sprintf ( group_list[ngroup[0]++], "fywFYW");\r
- sprintf ( group_list[ngroup[0]++], "gG");\r
- sprintf ( group_list[ngroup[0]++], "pP");\r
- sprintf ( group_list[ngroup[0]++], "cC");\r
- vfree (matrix_name);\r
- \r
- return group_list;\r
- }\r
- else if ( strm (mode, "clustalw_dot"))\r
- {\r
- sprintf ( group_list[ngroup[0]++], "csaCSA");\r
- sprintf ( group_list[ngroup[0]++], "atvATV");\r
- sprintf ( group_list[ngroup[0]++], "sagSAG");\r
- sprintf ( group_list[ngroup[0]++], "stnkSTNK");\r
- sprintf ( group_list[ngroup[0]++], "stpaSTPA");\r
- sprintf ( group_list[ngroup[0]++], "sgndSGND");\r
- sprintf ( group_list[ngroup[0]++], "sndeqkSNDEQK");\r
- sprintf ( group_list[ngroup[0]++], "ndeqhkNDEQHK");\r
- sprintf ( group_list[ngroup[0]++], "neqhrkNEQHRK");\r
- sprintf ( group_list[ngroup[0]++], "fvlimFVLIM");\r
- sprintf ( group_list[ngroup[0]++], "hfyHFY");\r
- vfree (matrix_name);\r
- return group_list;\r
- }\r
- else if ( strm (mode, "make_all"))\r
- {\r
- ngroup[0]=1;\r
- sprintf ( group_list[0], "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz");\r
- vfree (matrix_name);\r
- return group_list;\r
- }\r
- else sprintf ( matrix_name, "%s", mode);\r
- \r
- matrix=read_matrice ( matrix_name); \r
- \r
- for ( a=0;a< 26; a++)\r
- {\r
- if ( matrix[a][a]>0)\r
- {\r
- for ( c=0,b=0;b< 26; b++)\r
- {\r
- \r
- if ( matrix[a][b]>0 && matrix[b][b]>0)\r
- {\r
- buf[c++]=b+'A';\r
- buf[c++]=b+'a';\r
- }\r
- }\r
- buf[c]='\0';\r
- for ( is_in=0,b=0; b< ngroup[0]; b++)if ( strcmp (buf, group_list[b])==0)is_in=1;\r
- if (is_in==0)sprintf ( group_list[ngroup[0]++], "%s", buf);\r
- \r
- }\r
- }\r
- free_int (matrix, -1); \r
- vfree (matrix_name);\r
- \r
- return group_list;\r
- }\r
-char** make_group_aa_upgma (char*matrix, int max_n)\r
- {\r
- char **group_list;\r
- int **mat;\r
- int *used;\r
- int a, b, ba, bb, best, set, l, n;\r
- l=26;\r
- \r
- group_list=declare_char (l+1, l+1);\r
- for (a=0; a<l; a++)group_list[a][0]='a'+a;\r
- mat=read_matrice(matrix);\r
- used=vcalloc ( l, sizeof (int));\r
- n=l;\r
- \r
- while (n>max_n)\r
- {\r
- for (set=0,a=0; a<l-1; a++)\r
- for (b=a+1; b<l; b++)\r
- {\r
- if (used[a]||used[b])continue;\r
- \r
- if (set==0 || mat[a][b]>best)\r
- {\r
- best=mat[a][b];\r
- ba=a;\r
- bb=b;\r
- set=1;\r
- }\r
- }\r
- \r
- for (a=0; a<l; a++)\r
- {\r
- mat[ba][a]=mat[a][ba]=(mat [ba][a]+mat[bb][a])/2;\r
- used[bb]=1;\r
- }\r
- strcat (group_list[ba], group_list[bb]);\r
- vfree (group_list[bb]);\r
- group_list[bb]=NULL;\r
- \r
- n--;\r
- }\r
- \r
- for (n=0,a=0; a<l; a++)\r
- {\r
- if ( group_list[a])\r
- group_list[n++]=group_list[a];\r
- }\r
- vfree (used); free_int (mat, -1);\r
- return group_list;\r
- }\r
-\r
-int find_group_aa_distribution (char *col, int nseq,int n_group, char **gl, int *distrib, char *mode )\r
- {\r
- static int *distribution;\r
- static char **lgl;\r
- static int ln_group;\r
- int a, b, c;\r
- int *d;\r
- char **gl2;\r
- int n_group2;\r
- \r
- \r
- \r
- if ( lgl==NULL)\r
- lgl=make_group_aa ( &ln_group, mode);\r
- \r
- if ( gl==NULL)\r
- {\r
- gl2=lgl;\r
- n_group2=ln_group;\r
- }\r
- else\r
- {\r
- gl2=gl;\r
- n_group2=n_group;\r
- }\r
- \r
- if ( distribution==NULL || ln_group<n_group)distribution=vcalloc ( n_group2, sizeof (int));\r
- if ( distrib==NULL)d=distribution;\r
- else d=distrib;\r
- \r
- \r
- for ( a=0; a< n_group2; a++)d[a]=0;\r
- \r
- for ( a=0; a< nseq; a++)\r
- {\r
- for ( b=0; b< n_group2; b++)\r
- d[b]+=is_in_set (col[a], gl2[b]);\r
- }\r
- c=d[0];\r
- for ( a=0; a< n_group2; a++)\r
- c=(d[a]>c)?d[a]:c;\r
- return c;\r
- }\r
-\r
-\r
-\r
-int is_in_same_group_aa ( char r1, char r2, int n_group, char **gl, char *mode)\r
- {\r
- int a;\r
- static char **lgl;\r
- static int ln_group;\r
- \r
- char **gl2;\r
- int n_group2;\r
- \r
- /*use mode=idmat for similarity based on id*/\r
-\r
- r1=toupper(r1);\r
- r2=toupper(r2);\r
- if (mode==NULL)return (r1==r2)?1:0;\r
- \r
- if ( strm (mode, "clean"))\r
- {\r
- free_char (lgl, -1);\r
- lgl=NULL;\r
- ln_group=0;\r
- return 0;\r
- }\r
- else if ( strstr (mode, "cov"))\r
- {\r
- return 1;\r
- }\r
- \r
- if ( lgl==NULL)\r
- {\r
- lgl=make_group_aa ( &ln_group, mode);\r
- }\r
- \r
- if ( gl==NULL)\r
- {\r
- gl2=lgl;\r
- n_group2=ln_group;\r
- }\r
- else\r
- {\r
- gl2=gl;\r
- n_group2=n_group;\r
- }\r
- \r
- for ( a=0; a< n_group2; a++)\r
- if ( is_in_set ( r1, gl2[a]) && is_in_set ( r2, gl2[a]))return 1;\r
- return 0;\r
- }\r
- \r
-\r
-Alignment * gene2prot (Alignment *A){return A; }\r
-char * test_gene2prot (Constraint_list *CL, int s1)\r
- {\r
- int a, b,q, nal;\r
- int F=-10000000; /*FORBIDEN STATE*/\r
- int AL=0; /*ALLOWED STATE*/\r
- int SPLICE_PENALTY=1000;\r
- int FRAME_PENALTY=1000;\r
- \r
-\r
- int START, ORF1, ORF2, ORF3, s5NC; \r
- int s3NC,ORF3_G1, ORF3_T2, ORF3_NC, ORF3_A3, ORF3_T4;\r
- int U1_G1, U1_T2, U1_NC, U1_A3, U1_T4;\r
- int U2_G1, U2_T2, U2_NC, U2_A3, U2_T4;\r
- int U1, U2, U3, U4, U5, END;\r
- \r
- int nstate=0;\r
- int **transitions;\r
- int **v_tab;\r
- int **v_tab_p;\r
- int **last_coding;\r
- int **last_t4;\r
- int *potential;\r
- int v;\r
-\r
- int orf1, orf2, orf3, ncp, p, state, pstate, e, best_state_p=0, best_state_v=0, best_pstate_p=0, best_pstate_v;\r
- char *seq, *seq2, *seq3;\r
- int l;\r
- int *is_coding;\r
- int *is_t4;\r
- char *codon;\r
-\r
- static int *entry;\r
- int tot=0;\r
- \r
- seq=vcalloc ( strlen ((CL->S)->seq[s1])+1, sizeof (char));\r
- seq2=vcalloc ( strlen ((CL->S)->seq[s1])+1, sizeof (char));\r
- seq3=vcalloc ( strlen ((CL->S)->seq[s1])+1, sizeof (char));\r
- sprintf ( seq, "%s", (CL->S)->seq[s1]);\r
- ungap (seq);\r
-\r
- l=strlen (seq);\r
- for ( a=0; a< l; a++) seq[a]=tolower ( seq[a]);\r
- for ( a=0; a< l; a++) seq[a]=(seq[a]=='t')?'u': seq[a];\r
- \r
-\r
- potential=vcalloc (l+1, sizeof (int));\r
- CL=index_constraint_list ( CL);\r
- for (nal=0, a=0; a<(CL->S)->nseq; a++)\r
- for ( b=CL->start_index[s1][a]; b< CL->end_index[s1][a];b++)\r
- {\r
- entry=extract_entry(entry, b, CL);\r
- if ( entry[SEQ1]==s1)potential[entry[R1]-1]+=entry[WE];\r
- else if ( entry[SEQ2]==s1)potential[entry[R2]-1]+=entry[WE];\r
- tot+=entry[WE];\r
- nal++;\r
- }\r
-\r
-\r
- SPLICE_PENALTY=10000;\r
- FRAME_PENALTY=1000;\r
-\r
- \r
- nstate=0;\r
- START=nstate++; ORF1=nstate++; ORF2=nstate++; ORF3=nstate++; s5NC=nstate++; \r
- s3NC=nstate++;\r
- ORF3_G1=nstate++;U1_G1=nstate++;U2_G1=nstate++; \r
- ORF3_T2=nstate++;U1_T2=nstate++;U2_T2=nstate++;\r
- ORF3_NC=nstate++;U1_NC=nstate++;U2_NC=nstate++; \r
- ORF3_A3=nstate++;U1_A3=nstate++;U2_A3=nstate++; \r
- ORF3_T4=nstate++;U1_T4=nstate++;U2_T4=nstate++;\r
- \r
- \r
- U1=nstate++; U2=nstate++; U3=nstate++; U4=nstate++; U5=nstate++; \r
- END=nstate++;\r
- \r
- is_coding=vcalloc ( nstate, sizeof (int));\r
- is_coding[ORF1]=is_coding[ORF2]=is_coding[ORF3]=is_coding[U1]=is_coding[U2]=1;\r
- is_coding[U3]=is_coding[U4]=is_coding[U5]=1;\r
- \r
- is_t4=vcalloc ( nstate, sizeof (int));\r
- is_t4[ORF3_T4]=is_t4[U1_T4]=is_t4[U2_T4]=1;\r
- transitions=declare_int ( nstate, nstate);\r
- for (a=0; a< nstate; a++)\r
- for ( b=0; b< nstate; b++)transitions[a][b]=F;\r
- \r
- transitions[START][ORF1]=AL;\r
- transitions[START][s5NC]=AL-FRAME_PENALTY;\r
- transitions[s5NC][s5NC]=AL;\r
-\r
- transitions[s5NC][ORF1]=AL-FRAME_PENALTY;\r
-\r
- transitions[ORF1][ORF2]=AL;\r
- transitions[ORF2][ORF3]=AL;\r
- transitions[ORF3][U1]=AL;\r
- transitions[ORF3][ORF1]=AL;\r
- transitions[ORF3][ORF3_G1]=AL-SPLICE_PENALTY;\r
- \r
- \r
- transitions[ORF3_G1][ORF3_T2]=AL;\r
- transitions[ORF3_T2][ORF3_NC]=AL;\r
- transitions[ORF3_NC][ORF3_NC]=AL;\r
- transitions[ORF3_NC][ORF3_A3]=AL;\r
- transitions[ORF3_A3][ORF3_T4]=AL;\r
- transitions[ORF3_T4][ORF1]=AL-SPLICE_PENALTY;\r
-\r
- transitions[U1][U2]=AL;\r
- transitions[U1][U1_G1]=AL-SPLICE_PENALTY;\r
- transitions[U1_G1][U1_T2]=AL;\r
- transitions[U1_T2][U1_NC]=AL;\r
- transitions[U1_NC][U1_NC]=AL;\r
- transitions[U1_NC][U1_A3]=AL;\r
- transitions[U1_A3][U1_T4]=AL;\r
- transitions[U1_T4][U3]=AL-SPLICE_PENALTY;\r
- transitions[U3][U4]=AL;\r
- transitions[U4][ORF1]=AL;\r
- \r
- transitions[U2][U2_G1]=AL-SPLICE_PENALTY;\r
- transitions[U2_G1][U2_T2]=AL;\r
- transitions[U2_T2][U2_NC]=AL;\r
- transitions[U2_NC][U2_NC]=AL;\r
- transitions[U2_NC][U2_A3]=AL;\r
- transitions[U2_A3][U2_T4]=AL;\r
- transitions[U2_T4][U5]=AL-SPLICE_PENALTY;\r
- transitions[U5][ORF1]=AL;\r
- \r
- transitions[ORF3][s3NC]=AL-FRAME_PENALTY;\r
- transitions[ORF3][END]=AL;\r
- transitions[s3NC][END]=AL;\r
-\r
- \r
- v_tab=declare_int ( l+1,nstate);\r
- v_tab_p=declare_int ( l+1,nstate);\r
- last_coding=declare_int ( l+1,nstate);\r
- last_t4=declare_int ( l+1,nstate);\r
-\r
- for (a=0; a< l; a++) potential[a]-=200;\r
-\r
- codon=vcalloc ( 4, sizeof (char));\r
- best_pstate_p=START;\r
- best_pstate_v=0;\r
- nal=0;\r
- for ( p=1; p<=l; p++)\r
- {\r
- if (translate_dna_codon (seq+(p-1), 'x')=='x' || p>(l-2))orf1=F;\r
- else orf1=potential[p-1];\r
-\r
- if (p<2 || translate_dna_codon (seq+(p-2), 'x')=='x' || p>(l-1))orf2=F;\r
- else orf2=potential[p-1];\r
-\r
- \r
- if (p<3 || translate_dna_codon (seq+(p-3), 'x')=='x' || p>l)orf3=F;\r
- else orf3=potential[p-1];\r
- \r
- if ( best_int (3, 1, &a, orf1, orf2, orf3)!=F)ncp=-best_int (3, 1, &a, orf1, orf2, orf3);\r
- else ncp=1000;\r
- \r
- for ( state=0; state< nstate; state++)\r
- {\r
- \r
- if ( state==ORF1)e=orf1;\r
- else if ( state==ORF2)e=orf2;\r
- else if ( state==ORF3)e=orf3;\r
- else if ( state>=U1 && state<=U3)\r
- {\r
- e=0;\r
- }\r
- else if ( state==U4)\r
- {\r
- codon[2]=seq[p-1];\r
- codon[1]=seq[last_coding[p-1][U3]-1];\r
- codon[0]=seq[last_coding[p-2][U1_T4]-1];\r
- if ( translate_dna_codon (codon, 'x')=='x')e=F;\r
- else e=0;\r
- }\r
- else if ( state==U5)\r
- {\r
- codon[2]=seq[p-1];\r
- codon[1]=seq[last_coding[p-1][U2_T4]-1];\r
- q=seq[last_coding[p-1][U2_T4]];\r
- codon[0]=seq[last_coding[q-1][U1]-1];\r
- if ( translate_dna_codon (codon, 'x')=='x')e=F;\r
- else e=0;\r
- }\r
-\r
- else if (state>=ORF3_G1 && state<=U2_G1)e=(p<l-1 && seq[p-1]=='g' && seq[p]=='u')?ncp:F;\r
- else if ( state>=ORF3_T2 && state<=U2_T2)\r
- {\r
- e=(p>1 && seq[p-2]=='g' && seq[p-1]=='u')?ncp:F;\r
- }\r
- else if ( state>=ORF3_A3 && state<=U2_A3)e=(seq[p-1]=='a')?ncp:F;\r
- else if ( state>=ORF3_T4 && state<=U2_T4)e=(seq[p-1]=='u')?ncp:F;\r
- else e=ncp;\r
- \r
- for ( pstate=0; pstate<nstate; pstate++)\r
- {\r
- if (e==F || transitions[pstate][state]==F || v_tab[p-1][pstate]==F)v=F;\r
- else v=e+transitions[pstate][state]+v_tab[p-1][pstate];\r
- \r
- if ( pstate==0 || v>best_pstate_v)\r
- {best_pstate_v=v;best_pstate_p=pstate;}\r
- }\r
- v_tab[p][state]=best_pstate_v;\r
- v_tab_p[p][state]=best_pstate_p; \r
- \r
- if (!is_coding[state])last_coding[p][state]=last_coding[p-1][best_pstate_p];\r
- else if (is_coding[state])last_coding[p][state]=p;\r
- \r
- if (!is_t4[state])\r
- {\r
- if (is_coding[state] && last_t4[p-1][best_pstate_p]==0)last_t4[p][state]=p;\r
- else last_t4[p][state]=last_t4[p-1][best_pstate_p];\r
- }\r
- else if (is_t4[state])last_t4[p][state]=p;\r
- \r
- if (state==0 ||best_pstate_v>best_state_v ){best_state_p=state; best_state_v=best_pstate_v;}\r
- }\r
- }\r
- tot=0;\r
- for ( p=l; p>0; p--)\r
- {\r
- if ( best_state_p>=ORF1 && best_state_p<=ORF3){seq2[tot++]=tolower (seq[p-1]);}\r
- else if ( best_state_p>=U1 && best_state_p<=U5){seq2[tot++]=tolower (seq[p-1]);}\r
- if (best_state_p==ORF1)seq[p-1]=toupper (seq[p-1]);\r
- else if (best_state_p==ORF2 || best_state_p==ORF3)seq[p-1]=tolower (seq[p-1]);\r
- else if ( best_state_p==ORF3_NC || best_state_p==U1_NC || best_state_p==U2_NC) seq[p-1]='.';\r
- else if ( best_state_p==U1 || best_state_p==U2 || best_state_p==U3 || best_state_p==U4 || best_state_p==U5) seq[p-1]=best_state_p-U1+'1';\r
- else seq[p-1]=toupper (seq[p-1]);\r
- best_state_p=v_tab_p[p][best_state_p];\r
- }\r
-\r
- for ( a=0, b=tot-1; b>=0; b--, a++)\r
- seq3[a]=seq2[b];\r
- \r
- fprintf ( stderr, "\n%s\n", seq);\r
- fprintf ( stderr, "\nN coding=%d\n", tot);\r
- for ( a=0; a< tot; a+=3)\r
- {\r
- b=translate_dna_codon (seq3+a, 'x');\r
- fprintf ( stderr, "%c",b);\r
- if ( b=='x'){fprintf ( stderr, "\n");myexit (EXIT_SUCCESS);}\r
- }\r
- \r
- fprintf ( stderr, "\n"); \r
- myexit (EXIT_SUCCESS);\r
- return 0;\r
- \r
- \r
- \r
- }\r
-Alignment * dna_aln2_3frame_cdna_aln(Alignment *A,int *ns,int **l_s)\r
-{\r
- Alignment *B;\r
- int a;\r
- B=realloc_aln2 (NULL,6,strlen(A->seq_al[l_s[0][0]])+strlen(A->seq_al[l_s[1][0]]));\r
- for ( a=0; a< 3; a++) \r
- { \r
- B->seq_al[a]=translate_dna_seq (A->seq_al[l_s[0][0]]+a, 0, 'o',B->seq_al[a]);\r
- B->seq_al[a+3]=translate_dna_seq (A->seq_al[l_s[1][0]]+a, 0, 'o',B->seq_al[a+3]);\r
- }\r
- for ( a=1; a<3; a++)\r
- {\r
- if ( strlen(B->seq_al[a])<strlen(B->seq_al[0])) B->seq_al[a]=strcat ( B->seq_al[a], "x");\r
- if ( strlen(B->seq_al[a+3])<strlen(B->seq_al[3])) B->seq_al[a+3]=strcat ( B->seq_al[a+3], "x");\r
- }\r
- \r
- B->nseq=6;\r
- B->len_aln=strlen (B->seq_al[0]);\r
- return B;\r
-}\r
-\r
-//JM_ADD\r
-//For normal distribution scan\r
-#ifndef PI\r
-#define PI 3.141592653589793238462643\r
-#endif\r
-\r
-double normal(double x, double mean, double std)\r
-{\r
- return (1/(std*sqrt(2.0*PI)))*exp((-0.5*(x-mean)*(x-mean))/(std*std));\r
-} \r
-\r
-int ** get_sim_aln_array_normal_distribution ( Alignment *A, char *mode, int *STD, int *CENTER)\r
- {\r
- int **w;\r
- int a, b;\r
- \r
-\r
- w=declare_int ( A->nseq, A->nseq);\r
- \r
- for ( a=0; a< A->nseq-1; a++)\r
- {\r
- for ( b=a+1; b< A->nseq; b++)\r
- {\r
-\r
- w[a][b]=w[b][a]=generic_get_seq_sim_normal_distribution ( A->seq_al[a], A->seq_al[b], (A->cdna_cache)?A->cdna_cache[0]:NULL, mode, STD, CENTER);\r
- }\r
- }\r
- return w;\r
- }\r
-int generic_get_seq_sim_normal_distribution ( char *seq1, char *seq2, int*cache, char *mode, int *STD, int *CENTER)\r
-{\r
- return get_seq_sim_distribution ( seq1,seq2,GAP_LIST, mode, STD, CENTER);\r
-}\r
-\r
-int get_seq_sim_distribution ( char *string1, char *string2, char *ignore, char *in_mode, int *STD, int *CENTER)\r
- {\r
- int len1;\r
- int a;\r
- int pos0, gap=0;\r
- int p1, p2;\r
- int r=0,r1=0,r2=0;\r
- char *p;\r
- char mode[1000];\r
-\r
- double sim;\r
-\r
- \r
- sprintf ( mode, "%s", in_mode);\r
- \r
- /*mode: <mat>__<sim_mode>\r
- mat: idscore to get the alignment done\r
- any legal cw matrix\r
- sim_mode: sim1->identities/matches\r
- sim2->identities/min len \r
- */\r
-\r
- \r
- if ( (p=strstr (mode, "_"))!=NULL)\r
- {\r
- p[0]='\0';\r
- p++;\r
- }\r
-\r
- \r
- if (strstr (mode, "idscore"))\r
- {\r
- static int **mat;\r
- if (!mat) mat=read_matrice ("blosum62mt");\r
- return idscore_pairseq (string1, string2, -12, -1, mat,mode);\r
- }\r
- \r
- len1=strlen (string1);\r
- for ( sim=pos0=0,a=0; a< len1; a++)\r
- {\r
- r1=string1[a];\r
- r2=string2[a];\r
- p1=1-is_in_set (r1, ignore);\r
- p2=1-is_in_set (r2, ignore);\r
- if (p1 && p2)\r
- {\r
- pos0++;\r
- if (is_in_same_group_aa(r1,r2,0, NULL, mode))\r
- { \r
- sim += normal(a, *CENTER, *STD);\r
- }\r
- }\r
- else if (p1+p2==1)\r
- {\r
- gap++;\r
- }\r
- }\r
- \r
- if ( p==NULL || strm (p, "sim1") || strm (p, "sim"))\r
- {\r
- r=(pos0==0)?0:(sim*MAXID);\r
- }\r
-/* else if ( strm (p, "sim2"))\r
- {\r
- r=(pos1==0 || pos2==0)?0:(sim*MAXID)/MIN(pos1,pos2);\r
- }\r
- else if ( strm (p, "sim3"))\r
- {\r
- r=(pos1==0 || pos2==0)?0:(sim*MAXID)/MAX(pos1,pos2);\r
- }\r
- else if ( strm (p, "gap1"))\r
- {\r
- r=(len1==0)?MAXID:(gap*MAXID)/len1;\r
- r=MAXID-r;\r
- }\r
- else if ( strm (p, "logid"))\r
- {\r
- r=logid_score (pos0, sim);\r
- }*/\r
- return r;\r
- \r
- } \r
-\r
-\r
-Alignment *aln2clean_pw_aln (Alignment *A, OveralnP *F)// char *mode, int t, int f, int p1,int p2, int p3, char *fsa_mode)\r
-{\r
- int **C, **T;\r
- int a, b, c;\r
- Alignment *B;\r
- \r
- \r
- if (F->t==0)F->t=2;\r
- \r
- C=declare_int ( A->nseq, A->len_aln);\r
- T=declare_int ( A->nseq, A->len_aln);\r
- B=copy_aln (A, NULL);\r
- \r
- for (a=0; a< A->nseq;a++)\r
- {\r
- for (b=0; b<A->nseq; b++)\r
- {\r
- int *w;\r
- w=pw_aln2clean_aln_weight (A->seq_al[a], A->seq_al[b], 1,F);//f,p1, p2, p3, fsa_mode);\r
- for (c=0; c<A->len_aln; c++)\r
- {\r
- if (A->seq_al[a][c]=='-')continue;\r
- C[a][c]+=w[c];\r
- T[a][c]++;\r
- }\r
- vfree (w);\r
- }\r
- }\r
- \r
- \r
- \r
- for (a=0; a<A->nseq; a++)\r
- {\r
- for (b=0; b<A->len_aln; b++)\r
- {\r
- int c;\r
- c=A->seq_al[a][b];\r
- if ( c=='-');\r
- else if (T[a][b]==0);\r
- else\r
- {\r
- int r;\r
- r=(C[a][b]*10)/T[a][b];\r
- r=(r==10)?9:r;\r
- if (!F->mode || strm (F->mode, "number"))\r
- B->seq_al[a][b]='0'+r;\r
- else if ( F->mode && strm (F->mode, "unalign"))\r
- B->seq_al[a][b]='0'+r;\r
- else if ( F->mode && strm (F->mode, "lower") )\r
- {\r
- if (r<=F->t)B->seq_al[a][b]=tolower (B->seq_al[a][b]);\r
- else B->seq_al[a][b]=toupper (B->seq_al[a][b]);\r
- }\r
- }\r
- }\r
- }\r
-\r
- if ( F->mode && strm (F->mode, "unalign"))\r
- {\r
- A=unalign_aln (A, B, F->t);\r
- free_aln (B);\r
- B=copy_aln (A, NULL);\r
- }\r
- \r
- free_int (C, -1);\r
- free_int (T, -1);\r
- \r
- return B;\r
-}\r
- \r
-char **pw_aln2clean_pw_aln_fsa1 (char ** aln, OveralnP *F);\r
-char **pw_aln2clean_pw_aln_fsa2 (char ** aln, OveralnP *F);\r
-\r
-int * pw_aln2clean_aln_weight ( char *seq1, char *seq2, int w, OveralnP *F)\r
-{\r
- char **aln;\r
- int *weight;\r
- int l, a;\r
- \r
- if ( (l=strlen (seq1)) !=strlen (seq2))\r
- {\r
- HERE ("\n%s\n%s\n", seq1, seq2);\r
- printf_exit ( EXIT_FAILURE, stderr, "\nERROR: Comparing unaligned sequences [FATAL:%s]", PROGRAM);\r
- \r
- }\r
-\r
- aln=declare_char (2, l+1);\r
- sprintf ( aln[0], "%s", seq1);\r
- sprintf ( aln[1], "%s", seq2);\r
- \r
- \r
- aln=pw_aln2clean_pw_aln (aln, F);\r
- \r
- weight=vcalloc (l+1, sizeof (int));\r
- for (a=0; a<l; a++)\r
- {\r
- if ( aln[0][a] || seq1[a]=='x' || seq1[a]=='X' || seq2[a]=='x' || seq2[a]=='X')weight[a]=w;\r
- }\r
- free_char (aln, -1);\r
- \r
- return weight;\r
-}\r
- \r
- \r
-char **pw_aln2clean_pw_aln (char ** aln, OveralnP *F)\r
-{\r
- \r
- if ( strm (F->model, "fsa2"))return pw_aln2clean_pw_aln_fsa2 (aln,F);\r
- else if ( strm (F->model, "fsa1"))return pw_aln2clean_pw_aln_fsa1 (aln,F);\r
- else return pw_aln2clean_pw_aln_fsa1 (aln,F);\r
-}\r
-\r
-char **pw_aln2clean_pw_aln_fsa2 (char ** aln, OveralnP *FO)\r
-{\r
- int a, b, c, d, l, id;\r
- int c1, c2, e0, e1,tb, obs;\r
- int T0, T1,T2;\r
- int **mat, **tran, **p, **t, *s, *ids;\r
- int ns, ps, cs;\r
- int S, M1, M2, m1, m2,B1, B2,G1,G2, K;\r
- int F=-9999999;\r
- int MID_EXON_FACTOR=50;\r
- int best;\r
- static int **smat; \r
- int model_type=1;\r
- int *translate;\r
-\r
- if ( getenv ("MID_EXON_FACTOR"))MID_EXON_FACTOR=atoi (getenv ("MID_EXON_FACTOR"));\r
- \r
- \r
- \r
- if (!smat)smat=read_matrice ( "blosum62mt");\r
- \r
- l=strlen (aln[0]);\r
- \r
- if ( l!=strlen (aln[1]))\r
- {\r
- printf_exit ( EXIT_FAILURE, stderr, "\nERROR: unaligned strings");\r
- }\r
- \r
- \r
- \r
- s=vcalloc (l, sizeof (int));\r
- ids=vcalloc (l, sizeof (int));\r
- \r
- //record the id level of each posotion\r
- for (b=0; b<l; b++)\r
- {\r
- c1=tolower(aln[0][b]);c2=tolower(c2=aln[1][b]);\r
- \r
- if (c1=='-' || c2=='-' || c1=='X' || c2=='X' || c1!=c2)ids[b]=0;\r
- else ids[b]=1;\r
- }\r
- \r
- //record the state of each position: M, m, T, gap\r
- for (id=0,b=0,a=0;a<l; a++)\r
- {\r
- c1=aln[0][a];c2=aln[1][a];\r
- if (islower (c1))s[a]=3;\r
- else if (c1=='-' || c2=='-' || c1=='X' || c2=='X')s[a]=2;\r
- else\r
- {\r
- int sc;\r
- sc=smat[c1-'A'][c2-'A'];\r
- if (sc>=2){id++; s[a]=1;}\r
- else {s[a]=0;}\r
- b++;\r
- }\r
- }\r
- \r
- if (b==0) \r
- {\r
- vfree(s);vfree (ids);\r
- return aln;\r
- }\r
- \r
- \r
- \r
- FO->p1=(FO->p1==0)?5:FO->p1;\r
- FO->p2=(FO->p2==0)?15:FO->p2;\r
- FO->p3=(FO->p3==0)?0:FO->p3;\r
- FO->p4=(FO->p4==0)?100:FO->p4;\r
- \r
- \r
- T1=100*(float)id/(float)b;\r
- T2=(FO->f==0)?30:T1*(float)((float)FO->f/(float)100);\r
- T2=MAX(T2,20);\r
- \r
- //0: unaligned\r
- //1: aligned\r
- //2: gap\r
- //3: exon boundary\r
- \r
- ns=0;\r
- S=ns++;\r
- M1=ns++;//1 matched aligned \r
- m1=ns++;//2 mmatched aligned\r
- M2=ns++;//3 matched unaligned\r
- m2=ns++;//4 mmatched unaligned\r
- B1=ns++;//5 transition aligned\r
- B2=ns++;//6 transition unaligned\r
- \r
- mat=declare_int (ns, 4);\r
- tran=declare_int (ns, ns);\r
- p=declare_int (l+1, ns);\r
- t=declare_int (l+1, ns);\r
- \r
- //emission Values\r
- mat[M1][0]=F; //non id\r
- mat[M1][1]=T1;//id\r
- mat[M1][2]=0; //gap\r
- mat[M1][3]=F; //transition\r
- \r
- mat[M2][0]=F;\r
- mat[M2][1]=T2;\r
- mat[M2][2]=0;\r
- mat[M2][3]=F;\r
- \r
- mat[m1][0]=100-T1;\r
- mat[m1][1]=F;\r
- mat[m1][2]=0;\r
- mat[m1][3]=F;\r
- \r
- mat[m2][0]=100-T2;\r
- mat[m2][1]=F;\r
- mat[m2][2]=0;\r
- mat[m1][3]=F;\r
- \r
- mat[B1][0]=F;\r
- mat[B1][1]=F;\r
- mat[B1][2]=F;\r
- mat[B1][3]=0;\r
- \r
- mat[B2][0]=F;\r
- mat[B2][1]=F;\r
- mat[B2][2]=F;\r
- mat[B2][3]=0;\r
- \r
- //transition values\r
- tran[S][m1]=0;\r
- tran[S][m2]=0;\r
- tran[S][M1]=0;\r
- tran[S][M2]=0;\r
- tran[S][B1]=0;\r
- tran[S][B2]=0;\r
- \r
- \r
- tran[M1][m1]= 0;\r
- tran[M1][m2]=-FO->p4;\r
- tran[M1][M1]=+FO->p2;\r
- tran[M1][M2]= F;\r
- tran[M1][S ]= F;\r
- tran[M1][B1]= 0;\r
- tran[M1][B2]=-FO->p1;\r
- \r
- tran[M2][m1]= F;\r
- tran[M2][m2]=+FO->p3;\r
- tran[M2][M1]= F;\r
- tran[M2][M2]= 0;\r
- tran[M2][S] = F;\r
- tran[M2][B1]= F;\r
- tran[M2][B2]= 0;\r
- \r
- \r
- tran[m1][m1]= 0;\r
- tran[m1][m2]= F;\r
- tran[m1][M1]= 0;\r
- tran[m1][M2]= F;\r
- tran[m1][S] = F;\r
- tran[m1][B1]= 0;\r
- tran[m1][B2]=-FO->p1;\r
- \r
- tran[m2][m1]= F;\r
- tran[m2][m2]= 0;\r
- tran[m2][M1]= -FO->p4;\r
- tran[m2][M2]= +FO->p3;\r
- tran[m2][S] = F;\r
- tran[m2][B1]= F;\r
- tran[m2][B2]= 0;\r
- \r
- tran[B1][m1]= 0;\r
- tran[B1][m2]= F;\r
- tran[B1][M1]= 0;\r
- tran[B1][M2]= F;\r
- tran[B1][S]= F;\r
- tran[B1][B1]= F;\r
- tran[B1][B2]= F;\r
- \r
- tran[B2][m1]= -FO->p1;\r
- tran[B2][m2]= 0;\r
- tran[B2][M1]= -FO->p1;\r
- tran[B2][M2]= 0;\r
- tran[B2][S]= F;\r
- tran[B2][B1]= F;\r
- tran[B2][B2]= F;\r
- \r
- translate=vcalloc (ns, sizeof (int));\r
- translate[M1]=1;\r
- translate[m1]=1;\r
- translate[M2]=0;\r
- translate[m2]=0;\r
- translate[B1]=1;\r
- translate[B2]=0;\r
- \r
- for (a=1;a<=l; a++)\r
- {\r
- obs=s[a-1];\r
- \r
- for (cs=0; cs<ns; cs++)\r
- {\r
- for (ps=0; ps<ns; ps++)\r
- {\r
- c=p[a-1][ps]+mat[cs][obs]+tran[ps][cs];\r
- if (ps==0 || c>=best){t[a][cs]=ps;best=p[a][cs]=c;}\r
- }\r
- \r
- }\r
- }\r
- \r
- \r
- for (a=0; a<ns; a++)\r
- {\r
- if (a==0 || p[l][a]>=best){tb=a;best=p[l][a];}\r
- }\r
- \r
- for (a=l; a>0; a--)\r
- {\r
- int v;\r
- int p2;\r
- \r
- p2=a-1;\r
- aln[0][p2]=aln[1][p2]=translate[tb];\r
- tb=t[a][tb];\r
- \r
- }\r
- \r
- free_int (p, -1);\r
- vfree(s);\r
- free_int (t, -1);\r
- free_int (mat, -1);\r
- free_int (tran, -1);\r
- vfree (translate);\r
- return aln;\r
-}\r
-char **pw_aln2clean_pw_aln_fsa1 (char ** aln, OveralnP *FO)\r
-{\r
- int a, b, c, d, l, id;\r
- int c1, c2, e0, e1,tb, obs;\r
- int T0, T1,T2;\r
- int **mat, **tran, **p, **t, **s;\r
- int ns, ps, cs;\r
- int S, M1, M2, m1, m2, K;\r
- int F=-9999999;\r
- int best;\r
- static int **smat; \r
- int *translate;\r
-\r
- \r
- if (!smat)smat=read_matrice ( "blosum62mt");\r
- \r
- l=strlen (aln[0]);\r
- \r
- if ( l!=strlen (aln[1]))\r
- {\r
- printf_exit ( EXIT_FAILURE, stderr, "\nERROR: unaligned strings");\r
- }\r
- \r
- \r
- s=declare_int (l+1, 2);\r
- for (id=0,b=0,a=0;a<l; a++)\r
- {\r
- c1=aln[0][a];c2=aln[1][a];\r
- \r
- if ( c1=='-' || c2=='-' || c1=='x' || c1=='X' || c2=='x' || c2=='X')continue;\r
- else \r
- {\r
- int sc;\r
- sc=smat[c1-'A'][c2-'A'];\r
- if (sc>=2){id++; s[b][0]=1;}\r
- else {s[b][0]=0;}\r
- s[b][1]=a;\r
- b++;\r
- \r
- }\r
- }\r
- if (b==0) \r
- {\r
- free_int (s, -1);\r
- return aln;\r
- }\r
- FO->f=(FO->f==0)?30:FO->f;\r
- FO->p1=(FO->p1==0)?90:FO->p1;\r
- FO->p2=(FO->p2==0)?15:FO->p2;\r
- FO->p3=(FO->p3==0)?0:FO->p3;\r
-\r
- l=b;//length of the ungapped aln\r
- T1=100*(float)id/(float)b;\r
- T2=FO->f;//T1*f;\r
- \r
- \r
- \r
- //0: unaligned\r
- //1: aligned\r
- \r
- \r
- ns=0;\r
- S=ns++;\r
- M1=ns++;//1 matched aligned \r
- m1=ns++;//2 mmatched aligned\r
- M2=ns++;//3 matched unaligned\r
- m2=ns++;//4 mmatched unaligned\r
- \r
- mat=declare_int (ns, 2);\r
- tran=declare_int (ns, ns);\r
- p=declare_int (l+1, ns);\r
- t=declare_int (l+1, ns);\r
- \r
- \r
- mat[M1][0]=F;\r
- mat[M1][1]=T1;\r
- \r
- mat[M2][0]=F;\r
- mat[M2][1]=T2;\r
- \r
- mat[m1][0]=100-T1;\r
- mat[m1][1]=F;\r
- \r
- mat[m2][0]=100-T2;\r
- mat[m2][1]=F;\r
- \r
- \r
- tran[S][m1]=0;\r
- tran[S][m2]=0;\r
- tran[S][M1]=0;\r
- tran[S][M2]=0;\r
- \r
- \r
- tran[M1][m1]= 0;\r
- tran[M1][m2]=-FO->p1;// -P;\r
- tran[M1][M1]=+FO->p2;\r
- tran[M1][M2]= F;\r
- tran[M1][S] = F;\r
- \r
- tran[M2][m1]= F;\r
- tran[M2][m2]=+FO->p3;\r
- tran[M2][M1]= F;\r
- tran[M2][M2]= 0;\r
- tran[M2][S]= F;\r
- \r
- tran[m1][m1]= 0;\r
- tran[m1][m2]= F;\r
- tran[m1][M1]= 0;\r
- tran[m1][M2]= F;\r
- tran[m1][S]= F;\r
- \r
- tran[m2][m1]= F;\r
- tran[m2][m2]= 0;\r
- tran[m2][M1]=-FO->p1;\r
- tran[m2][M2]=+FO->p3;\r
- tran[m2][S]= F;\r
- \r
- translate=vcalloc (ns, sizeof (int));\r
- translate[M1]=1;\r
- translate[m1]=1;\r
- translate[M2]=0;\r
- translate[m2]=0;\r
- translate[S]=1;\r
- \r
- \r
- for (a=1;a<=l; a++)\r
- {\r
- obs=s[a-1][0];\r
- \r
- for (cs=0; cs<ns; cs++)\r
- {\r
- for (ps=0; ps<ns; ps++)\r
- {\r
- c=p[a-1][ps]+mat[cs][obs]+tran[ps][cs];\r
- if (ps==0 || c>=best){t[a][cs]=ps;best=p[a][cs]=c;}\r
- }\r
- \r
- }\r
- }\r
- \r
- \r
- for (a=0; a<ns; a++)\r
- {\r
- if (a==0 || p[l][a]>=best){tb=a;best=p[l][a];}\r
- }\r
- for (a=l; a>0; a--)\r
- {\r
- int p2=s[a-1][1];\r
- aln[0][p2]=aln[1][p2]=translate[tb];\r
- \r
- tb=t[a][tb];\r
- }\r
- \r
- \r
- free_int (p, -1);\r
- free_int (s, -1);\r
- free_int (t, -1);\r
- free_int (mat, -1);\r
- free_int (tran, -1);\r
- vfree (translate);\r
- return aln;\r
-}\r
-float* analyze_overaln ( Alignment *iA, Alignment *iB, char *mode, int filter, int f, int p1,int p2, int p3)\r
-{\r
- Alignment *C, *D;\r
- Alignment *A, *B;\r
- OveralnP *F;\r
-\r
- F=vcalloc (1, sizeof (OveralnP));\r
- F->p1=p1;\r
- F->p2=p2;\r
- F->p3=p3;\r
- F->f=f;\r
- F->t=filter;\r
- sprintf (F->mode, "%s", mode);\r
- \r
- \r
- float *r;\r
- A=copy_aln (iA, NULL);\r
- B=copy_aln (iB, NULL);\r
- \r
- C=aln2gap_cache (A,0);\r
- A=filter_aln_upper_lower (A, C, 0, 0);\r
- D=aln2clean_pw_aln (B, F);\r
- r=aln2pred (A,D,mode);\r
- free_aln (C);\r
- free_aln (D);\r
- free_aln (A);\r
- free_aln (B);\r
- return r;\r
-}\r
-float* aln2pred ( Alignment *A, Alignment*B, char *mode)\r
-{\r
- int a, b, c, d, i, l, salp, s, n;\r
- static char **list, *buf1, *buf2, *alp, *alp_lu;\r
- static int ***r;\r
- int T, N;\r
- int fp, fn, tn, tp;\r
- int tfp, tfn, ttn, ttp;\r
- float sp, sn, sen2, best, result;\r
- int print=1;\r
- float *fresult;\r
-\r
- fresult=vcalloc ( 3, sizeof (float));\r
- \r
- if ( mode && strstr (mode, "case"))\r
- {\r
- A=aln2case_aln (A,"u","l");\r
- B=aln2case_aln (B,"u","l");\r
- }\r
- \r
- if (mode && strstr (mode, "printaln"))\r
- {\r
- Sequence *S;\r
- Alignment *C;\r
- S=aln2seq (A);\r
- C=copy_aln (B, NULL);\r
- for (a=0; a<B->nseq; a++)\r
- {\r
- i=name_is_in_list (C->name[a], S->name, S->nseq, 100);\r
- if ( i==-1)\r
- for (b=0; b<C->len_aln; b++) C->seq_al[a][b]='-';\r
- else\r
- for (d=0,b=0; b<C->len_aln; b++)\r
- {\r
- if ( !is_gap (C->seq_al[a][b]))\r
- {\r
- if (C->seq_al[a][b]==S->seq[i][d])C->seq_al[a][b]=toupper(C->seq_al[a][b]);\r
- d++;\r
- }\r
- }\r
- }\r
- print_aln (C);\r
- }\r
- \r
- vfree (alp);vfree (alp_lu);\r
- alp=vcalloc ( 256, sizeof (char));\r
- alp_lu=vcalloc ( 256, sizeof (char));\r
-\r
- for (c=0; c<2; c++)\r
- {\r
- Alignment *AL;\r
- AL=(c==0)?A:B;\r
- for (salp=0,a=0; a<AL->nseq; a++)\r
- {\r
- for (b=0; b<AL->len_aln; b++)\r
- {\r
- c=AL->seq_al[a][b];\r
- if (!is_gap(c) && !alp[c])\r
- {\r
- salp++;\r
- alp_lu[salp]=c;\r
- alp[c]=salp;\r
- }\r
- }\r
- }\r
- }\r
- \r
- vfree (buf1); vfree(buf2);\r
- buf1=vcalloc ( A->len_aln+1, sizeof (char));\r
- buf2=vcalloc ( B->len_aln+1, sizeof (char));\r
- \r
- free_arrayN ((void **)r, 3);\r
- r=declare_arrayN(3, sizeof (int),A->nseq,salp+1,salp+1);\r
- free_char ( list, -1);\r
- list=declare_char ( A->nseq, 100);\r
- for (n=0,a=0; a< A->nseq; a++)\r
- {\r
- for ( b=0; b<B->nseq; b++)\r
- {\r
- if ( strm (A->name[a], B->name[b]))\r
- {\r
- sprintf ( buf1, "%s", A->seq_al[a]);\r
- sprintf ( buf2, "%s", B->seq_al[b]);\r
- ungap (buf1); ungap (buf2);\r
- if ((l=strlen (buf1))!=strlen (buf2))continue;\r
- else\r
- {\r
- sprintf ( list[n], "%s", A->name[a]);\r
- for (c=0; c<l; c++)\r
- {\r
- int c1, c2;\r
- c1=buf1[c];\r
- c2=buf2[c];\r
- r[n][alp[c1]][alp[c2]]++;\r
- }\r
- n++;\r
- }\r
- }\r
- }\r
- }\r
- \r
-\r
- \r
- for ( s=1; s<=salp; s++)\r
- {\r
- char type[4];\r
- sprintf (type, "_%c_", alp_lu[s]);\r
- ttp=ttn=tfp=tfn=0;\r
- for (a=0; a<n; a++)\r
- {\r
- tp=tn=fp=fn=0;\r
- for (b=1; b<=salp; b++)\r
- {\r
- for (c=1; c<=salp; c++)\r
- {\r
- if ( b==s && c==s) tp+=r[a][b][c];\r
- else if ( b==s && c!=s)fn+=r[a][b][c];\r
- else if ( b!=s && c==s)fp+=r[a][b][c];\r
- else if ( b!=s && b!=s)tn+=r[a][b][c];\r
- }\r
- \r
- }\r
- \r
- ttp+=tp;\r
- ttn+=tn;\r
- tfp+=fp;\r
- tfn+=fn;\r
- rates2sensitivity (tp, tn, fp, fn, &sp, &sn, &sen2, &best);\r
- if ( mode && strstr (mode, "printstat"))fprintf ( stdout, ">%s S=%c sp=%6.2f sn=%6.2f sen2=%6.2f best=%6.2f\n", list[a],alp_lu[s],sp, sn, sen2, best);\r
- }\r
- \r
- rates2sensitivity (ttp, ttn, tfp, tfn, &sp, &sn, &sen2, &best);\r
- if (mode && strstr (mode, "printstat"))fprintf ( stdout, ">TOT S=%c sp=%6.2f sn=%6.2f re=%6.2f best=%6.2f\n", alp_lu[s],sp, sn, sen2, best);\r
- \r
- if ( mode && strstr (mode, type))\r
- {\r
- fresult[0]=sn;\r
- fresult[1]=sp;\r
- fresult[2]=sen2;\r
- }\r
- }\r
- return fresult;\r
-}\r
-\r
-Alignment * mark_exon_boundaries (Alignment *A, Alignment *E)\r
-{\r
- char *buf, *buf2;\r
- int a, b, c, i, l;\r
- \r
- buf2=vcalloc ( E->len_aln+1, sizeof (char));\r
- buf =vcalloc ( E->len_aln+1, sizeof (char));\r
- \r
- for (a=0; a< A->nseq; a++)\r
- {\r
- i=name_is_in_list (A->name[a], E->name, E->nseq, 100);\r
- if ( i==-1) continue;\r
- sprintf (buf, "%s", E->seq_al[i]);\r
- ungap (buf);\r
- l=strlen (buf);\r
- //clean buf2\r
- for (c=0, b=0; b<l; b++)if (buf[b]!='o' && buf[b]!='b' && buf[b]!='j')buf2[c++]=toupper(buf[b]);\r
- buf2[c]='\0';\r
- \r
- //lowercase the boundaries of buf2;\r
- for ( c=0,b=0; b<l; b++)\r
- {\r
- //ENSEMBL: o: 0, b:1 j:2\r
- if (buf[b]=='b' || buf[b]=='o' && c>=1)buf2[c-1]=tolower(buf2[c-1]);\r
- else if (buf[b]=='j' &&c<l)buf2[c+1]=tolower(buf2[c+1]);\r
- else c++;\r
- }\r
- \r
- for (c=0,b=0; b<A->len_aln; b++)\r
- {\r
- if (!is_gap(A->seq_al[a][b]))\r
- {\r
- A->seq_al[a][b]=buf2[c++];\r
- }\r
- }\r
- }\r
- vfree (buf);\r
- vfree (buf2);\r
- return A;\r
-}\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-\r
-/*********************************COPYRIGHT NOTICE**********************************/
-/*© Centro de Regulacio Genomica */
-/*and */
-/*Cedric Notredame */
-/*Tue Oct 27 10:12:26 WEST 2009. */
-/*All rights reserved.*/
-/*This file is part of T-COFFEE.*/
-/**/
-/* T-COFFEE is free software; you can redistribute it and/or modify*/
-/* it under the terms of the GNU General Public License as published by*/
-/* the Free Software Foundation; either version 2 of the License, or*/
-/* (at your option) any later version.*/
-/**/
-/* T-COFFEE is distributed in the hope that it will be useful,*/
-/* but WITHOUT ANY WARRANTY; without even the implied warranty of*/
-/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/
-/* GNU General Public License for more details.*/
-/**/
-/* You should have received a copy of the GNU General Public License*/
-/* along with Foobar; if not, write to the Free Software*/
-/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/
-/*............................................... |*/
-/* If you need some more information*/
-/* cedric.notredame@europe.com*/
-/*............................................... |*/
-/**/
-/**/
-/* */
-/*********************************COPYRIGHT NOTICE**********************************/