#include #include #include #include #include #include #include "io_lib_header.h" #include "util_lib_header.h" #include "define_header.h" #include "dp_lib_header.h" void print_atom ( Atom*A); float **** quantile_apdb_filtration ( Alignment *A, float ****residues, Constraint_list *CL,Pdb_param *PP, FILE *fp); float **** irmsdmin_apdb_filtration ( Alignment *A, float ****residues, Constraint_list *CL,Pdb_param *PP, FILE *fp); int apdb ( int argc, char *argv[]) { Constraint_list *CL=NULL; Sequence *S=NULL; Alignment *A=NULL; Alignment *EA=NULL; Pdb_param *pdb_param; Fname *F=NULL; char *file_name; int a,c; int n_pdb; /*PARAMETERS VARIABLES*/ int garbage; char *parameters; FILE *fp_parameters; int quiet; char *se_name; FILE *le=NULL; char **list_file; int n_list; char **struc_to_use; int n_struc_to_use; char *aln; char *repeat_seq; char *repeat_pdb; char *color_mode; char *comparison_io; int n_excluded_nb; float maximum_distance; float similarity_threshold; float md_threshold; int print_rapdb; char *outfile; char *run_name; char *apdb_outfile; char *cache; char **out_aln_format; int n_out_aln_format; char *output_res_num; char *local_mode; float filter; int filter_aln; int irmsd_graph; int nirmsd_graph; int n_template_file; char **template_file_list; char *mode; int prot_min_sim; int prot_max_sim; int prot_min_cov; int pdb_min_sim; int pdb_max_sim; int pdb_min_cov; int gapped; char *prot_blast_server; char *pdb_blast_server; char *pdb_db; char *prot_db; int min_ncol; argv=standard_initialisation (argv, &argc); /*PARAMETER PROTOTYPE: READ PARAMETER FILE */ declare_name (parameters); get_cl_param(\ /*argc*/ argc ,\ /*argv*/ argv ,\ /*output*/ &le ,\ /*Name*/ "-parameters" ,\ /*Flag*/ &garbage ,\ /*TYPE*/ "R_F" ,\ /*OPTIONAL?*/ OPTIONAL ,\ /*MAX Nval*/ 1 ,\ /*DOC*/ "Read the files in the parameter file" ,\ /*Parameter*/ ¶meters ,\ /*Def 1*/ "NULL" ,\ /*Def 2*/ "stdin" ,\ /*Min_value*/ "any" ,\ /*Max Value*/ "any" \ ); if ( parameters && parameters[0]) { argv[argc]=vcalloc ( VERY_LONG_STRING, sizeof(char)); a=0; fp_parameters=vfopen (parameters, "r"); while ((c=fgetc (fp_parameters))!=EOF)argv[1][a++]=c; vfclose (fp_parameters); argv[argc][a]='\0'; argc++; argv=break_list ( argv, &argc, "=:;, \n"); } /*PARAMETER PROTOTYPE*/ declare_name (se_name); get_cl_param(\ /*argc*/ argc ,\ /*argv*/ argv ,\ /*output*/ &le ,\ /*Name*/ "-quiet" ,\ /*Flag*/ &quiet ,\ /*TYPE*/ "W_F" ,\ /*OPTIONAL?*/ OPTIONAL ,\ /*MAX Nval*/ 1 ,\ /*DOC*/ "ND" ,\ /*Parameter*/ &se_name ,\ /*Def 1*/ "stderr" ,\ /*Def 2*/ "/dev/null" ,\ /*Min_value*/ "any" ,\ /*Max Value*/ "any" \ ); le=vfopen ( se_name, "w"); fprintf ( le, "\nPROGRAM: %s\n",argv[0]); /*PARAMETER PROTOTYPE: IN */ list_file=declare_char ( 200, STRING); n_list=get_cl_param(\ /*argc*/ argc ,\ /*argv*/ argv ,\ /*output*/ &le ,\ /*Name*/ "-in" ,\ /*Flag*/ &garbage ,\ /*TYPE*/ "S" ,\ /*OPTIONAL?*/ OPTIONAL ,\ /*MAX Nval*/ 200 ,\ /*DOC*/ "ND" ,\ /*Parameter*/ list_file ,\ /*Def 1*/ "",\ /*Def 2*/ "stdin" ,\ /*Min_value*/ "any" ,\ /*Max Value*/ "any" \ ); /*PARAMETER PROTOTYPE: IN */ struc_to_use=declare_char ( 200, STRING); n_struc_to_use=get_cl_param(\ /*argc*/ argc ,\ /*argv*/ argv ,\ /*output*/ &le ,\ /*Name*/ "-struc_to_use" ,\ /*Flag*/ &garbage ,\ /*TYPE*/ "S" ,\ /*OPTIONAL?*/ OPTIONAL ,\ /*MAX Nval*/ 200 ,\ /*DOC*/ "ND" ,\ /*Parameter*/ struc_to_use ,\ /*Def 1*/ "",\ /*Def 2*/ "stdin" ,\ /*Min_value*/ "any" ,\ /*Max Value*/ "any" \ ); /*PARAMETER PROTOTYPE: COMPARISON IO */ declare_name (comparison_io); get_cl_param(\ /*argc*/ argc ,\ /*argv*/ argv ,\ /*output*/ &le ,\ /*Name*/ "-io_format" ,\ /*Flag*/ &garbage ,\ /*TYPE*/ "S" ,\ /*OPTIONAL?*/ OPTIONAL ,\ /*MAX Nval*/ 200 ,\ /*DOC*/ "ND" ,\ /*Parameter*/ &comparison_io,\ /*Def 1*/ "hsgd0123456",\ /*Def 2*/ "" ,\ /*Min_value*/ "any" ,\ /*Max Value*/ "any" \ ); /*PARAMETER PROTOTYPE: ALN */ declare_name (aln); get_cl_param(\ /*argc*/ argc ,\ /*argv*/ argv ,\ /*output*/ &le ,\ /*Name*/ "-aln" ,\ /*Flag*/ &garbage ,\ /*TYPE*/ "S" ,\ /*OPTIONAL?*/ OPTIONAL ,\ /*MAX Nval*/ 1 ,\ /*DOC*/ "ND" ,\ /*Parameter*/ &aln,\ /*Def 1*/ "",\ /*Def 2*/ "stdin" ,\ /*Min_value*/ "any" ,\ /*Max Value*/ "any" \ ); /*PARAMETER PROTOTYPE: ALN */ declare_name (repeat_seq); get_cl_param(\ /*argc*/ argc ,\ /*argv*/ argv ,\ /*output*/ &le ,\ /*Name*/ "-repeat_seq" ,\ /*Flag*/ &garbage ,\ /*TYPE*/ "S" ,\ /*OPTIONAL?*/ OPTIONAL ,\ /*MAX Nval*/ 1 ,\ /*DOC*/ "ND" ,\ /*Parameter*/ &repeat_seq,\ /*Def 1*/ "",\ /*Def 2*/ "stdin" ,\ /*Min_value*/ "any" ,\ /*Max Value*/ "any" \ ); /*PARAMETER PROTOTYPE: ALN */ declare_name (repeat_pdb); get_cl_param(\ /*argc*/ argc ,\ /*argv*/ argv ,\ /*output*/ &le ,\ /*Name*/ "-repeat_pdb" ,\ /*Flag*/ &garbage ,\ /*TYPE*/ "S" ,\ /*OPTIONAL?*/ OPTIONAL ,\ /*MAX Nval*/ 1 ,\ /*DOC*/ "ND" ,\ /*Parameter*/ &repeat_pdb,\ /*Def 1*/ "",\ /*Def 2*/ "stdin" ,\ /*Min_value*/ "any" ,\ /*Max Value*/ "any" \ ); /*PARAMETER PROTOTYPE: Nb to exclude */ get_cl_param(\ /*argc*/ argc ,\ /*argv*/ argv ,\ /*output*/ &le ,\ /*Name*/ "-n_excluded_nb" ,\ /*Flag*/ &garbage ,\ /*TYPE*/ "D" ,\ /*OPTIONAL?*/ OPTIONAL ,\ /*MAX Nval*/ 1 ,\ /*DOC*/ "Exclude the N Nb on each side of the central residue. -1 triggers an automatic setting equal to the window size corresponding to the sphere" ,\ /*Parameter*/ &n_excluded_nb ,\ /*Def 1*/ "-1" ,\ /*Def 2*/ "1" ,\ /*Min_value*/ "any" ,\ /*Max Value*/ "any" \ ); /*PARAMETER PROTOTYPE: diatances to count */ get_cl_param(\ /*argc*/ argc ,\ /*argv*/ argv ,\ /*output*/ &le ,\ /*Name*/ "-similarity_threshold" ,\ /*Flag*/ &garbage ,\ /*TYPE*/ "F" ,\ /*OPTIONAL?*/ OPTIONAL ,\ /*MAX Nval*/ 1 ,\ /*DOC*/ "ND" ,\ /*Parameter*/ &similarity_threshold,\ /*Def 1*/ "70" ,\ /*Def 2*/ "70" ,\ /*Min_value*/ "any" ,\ /*Max Value*/ "any" \ ); /*PARAMETER PROTOTYPE: diatances to count */ get_cl_param(\ /*argc*/ argc ,\ /*argv*/ argv ,\ /*output*/ &le ,\ /*Name*/ "-filter" ,\ /*Flag*/ &garbage ,\ /*TYPE*/ "F" ,\ /*OPTIONAL?*/ OPTIONAL ,\ /*MAX Nval*/ 1 ,\ /*DOC*/ "Filter by only keeping the best quantile" ,\ /*Parameter*/ &filter,\ /*Def 1*/ "1.00" ,\ /*Def 2*/ "1.00" ,\ /*Min_value*/ "-1.00" ,\ /*Max Value*/ "1.00" \ ); /*PARAMETER PROTOTYPE: diatances to count */ get_cl_param(\ /*argc*/ argc ,\ /*argv*/ argv ,\ /*output*/ &le ,\ /*Name*/ "-filter_aln" ,\ /*Flag*/ &garbage ,\ /*TYPE*/ "D" ,\ /*OPTIONAL?*/ OPTIONAL ,\ /*MAX Nval*/ 1 ,\ /*DOC*/ "Lower Case For Residues Filtered Out" ,\ /*Parameter*/ &filter_aln,\ /*Def 1*/ "0" ,\ /*Def 2*/ "1" ,\ /*Min_value*/ "0" ,\ /*Max Value*/ "1" \ ); /*PARAMETER PROTOTYPE: diatances to count */ get_cl_param(\ /*argc*/ argc ,\ /*argv*/ argv ,\ /*output*/ &le ,\ /*Name*/ "-irmsd_graph" ,\ /*Flag*/ &garbage ,\ /*TYPE*/ "D" ,\ /*OPTIONAL?*/ OPTIONAL ,\ /*MAX Nval*/ 1 ,\ /*DOC*/ "Outputs the irmsd, position/position" ,\ /*Parameter*/ &irmsd_graph,\ /*Def 1*/ "0" ,\ /*Def 2*/ "1" ,\ /*Min_value*/ "0" ,\ /*Max Value*/ "1" \ ); /*PARAMETER PROTOTYPE: diatances to count */ get_cl_param(\ /*argc*/ argc ,\ /*argv*/ argv ,\ /*output*/ &le ,\ /*Name*/ "-nirmsd_graph" ,\ /*Flag*/ &garbage ,\ /*TYPE*/ "D" ,\ /*OPTIONAL?*/ OPTIONAL ,\ /*MAX Nval*/ 1 ,\ /*DOC*/ "Outputs the NIRMSD VS N Removed Residues Curve" ,\ /*Parameter*/ &nirmsd_graph,\ /*Def 1*/ "0" ,\ /*Def 2*/ "1" ,\ /*Min_value*/ "0" ,\ /*Max Value*/ "1" \ ); /*PARAMETER PROTOTYPE: -rmsd_threshold */ get_cl_param(\ /*argc*/ argc ,\ /*argv*/ argv ,\ /*output*/ &le ,\ /*Name*/ "-md_threshold" ,\ /*Flag*/ &garbage ,\ /*TYPE*/ "F" ,\ /*OPTIONAL?*/ OPTIONAL ,\ /*MAX Nval*/ 1 ,\ /*DOC*/ "ND" ,\ /*Parameter*/ &md_threshold ,\ /*Def 1*/ "1" ,\ /*Def 2*/ "1" ,\ /*Min_value*/ "any" ,\ /*Max Value*/ "any" \ ); /*PARAMETER PROTOTYPE: -maximum distances */ get_cl_param(\ /*argc*/ argc ,\ /*argv*/ argv ,\ /*output*/ &le ,\ /*Name*/ "-maximum_distance" ,\ /*Flag*/ &garbage ,\ /*TYPE*/ "F" ,\ /*OPTIONAL?*/ OPTIONAL ,\ /*MAX Nval*/ 1 ,\ /*DOC*/ "ND" ,\ /*Parameter*/ &maximum_distance ,\ /*Def 1*/ "10" ,\ /*Def 2*/ "10" ,\ /*Min_value*/ "any" ,\ /*Max Value*/ "any" \ ); /*PARAMETER PROTOTYPE: -print_rapdb */ get_cl_param(\ /*argc*/ argc ,\ /*argv*/ argv ,\ /*output*/ &le ,\ /*Name*/ "-print_rapdb" ,\ /*Flag*/ &garbage ,\ /*TYPE*/ "D" ,\ /*OPTIONAL?*/ OPTIONAL ,\ /*MAX Nval*/ 1 ,\ /*DOC*/ "Prints the neighborhood of each pair of aligned residues, along with the associated local score" ,\ /*Parameter*/ &print_rapdb ,\ /*Def 1*/ "0" ,\ /*Def 2*/ "1" ,\ /*Min_value*/ "any" ,\ /*Max Value*/ "any" \ ); /*PARAMETER PROTOTYPE: RUN_NAME */ declare_name (run_name); get_cl_param(\ /*argc*/ argc ,\ /*argv*/ argv ,\ /*output*/ &le ,\ /*Name*/ "-run_name" ,\ /*Flag*/ &garbage ,\ /*TYPE*/ "W_F" ,\ /*OPTIONAL?*/ OPTIONAL ,\ /*MAX Nval*/ 1 ,\ /*DOC*/ "ND" ,\ /*Parameter*/ &run_name ,\ /*Def 1*/ "default" ,\ /*Def 2*/ "" ,\ /*Min_value*/ "default" ,\ /*Max Value*/ "any" \ ); /*PARAMETER PROTOTYPE: OUTFILE */ /*PARAMETER PROTOTYPE: OUTFILE */ declare_name ( outfile); get_cl_param(\ /*argc*/ argc ,\ /*argv*/ argv ,\ /*output*/ &le ,\ /*Name*/ "-outfile" ,\ /*Flag*/ &garbage ,\ /*TYPE*/ "W_F" ,\ /*OPTIONAL?*/ OPTIONAL ,\ /*MAX Nval*/ 1 ,\ /*DOC*/ "ND" ,\ /*Parameter*/ &outfile ,\ /*Def 1*/ "no" ,\ /*Def 2*/ "default" ,\ /*Min_value*/ "default" ,\ /*Max Value*/ "any" \ ); /*PARAMETER PROTOTYPE: OUTFILE */ declare_name ( apdb_outfile); get_cl_param(\ /*argc*/ argc ,\ /*argv*/ argv ,\ /*output*/ &le ,\ /*Name*/ "-apdb_outfile" ,\ /*Flag*/ &garbage ,\ /*TYPE*/ "W_F" ,\ /*OPTIONAL?*/ OPTIONAL ,\ /*MAX Nval*/ 1 ,\ /*DOC*/ "ND" ,\ /*Parameter*/ &apdb_outfile ,\ /*Def 1*/ "stdout" ,\ /*Def 2*/ "default" ,\ /*Min_value*/ "any" ,\ /*Max Value*/ "any" \ ); /*PARAMETER PROTOTYPE: OUTPUT_FORMAT */ out_aln_format=declare_char ( 200, STRING); n_out_aln_format=get_cl_param(\ /*argc*/ argc ,\ /*argv*/ argv ,\ /*output*/ &le ,\ /*Name*/ "-output" ,\ /*Flag*/ &garbage ,\ /*TYPE*/ "S" ,\ /*OPTIONAL?*/ OPTIONAL ,\ /*MAX Nval*/ 200 ,\ /*DOC*/ "ND" ,\ /*Parameter*/ out_aln_format,\ /*Def 1*/ "score_html" ,\ /*Def 2*/ "" ,\ /*Min_value*/ "any" ,\ /*Max Value*/ "any" \ ); /*PARAMETER PROTOTYPE: INFILE */ declare_name (color_mode); get_cl_param(\ /*argc*/ argc ,\ /*argv*/ argv ,\ /*output*/ &le ,\ /*Name*/ "-color_mode" ,\ /*Flag*/ &garbage ,\ /*TYPE*/ "S" ,\ /*OPTIONAL?*/ OPTIONAL ,\ /*MAX Nval*/ 1 ,\ /*DOC*/ "ND" ,\ /*Parameter*/ &color_mode ,\ /*Def 1*/ "apdb" ,\ /*Def 2*/ "irmsd" ,\ /*Min_value*/ "any" ,\ /*Max Value*/ "any" \ ); /*PARAMETER PROTOTYPE: INFILE */ declare_name (output_res_num); get_cl_param(\ /*argc*/ argc ,\ /*argv*/ argv ,\ /*output*/ &le ,\ /*Name*/ "-seqnos" ,\ /*Flag*/ &garbage ,\ /*TYPE*/ "S" ,\ /*OPTIONAL?*/ OPTIONAL ,\ /*MAX Nval*/ 1 ,\ /*DOC*/ "ND" ,\ /*Parameter*/ &output_res_num ,\ /*Def 1*/ "off" ,\ /*Def 2*/ "on" ,\ /*Min_value*/ "any" ,\ /*Max Value*/ "any" \ ); declare_name (cache); get_cl_param(\ /*argc*/ argc ,\ /*argv*/ argv ,\ /*output*/ &le ,\ /*Name*/ "-cache" ,\ /*Flag*/ &garbage ,\ /*TYPE*/ "W_F" ,\ /*OPTIONAL?*/ OPTIONAL ,\ /*MAX Nval*/ 1 ,\ /*DOC*/ "use,ignore,update,local, directory name" ,\ /*Parameter*/ &cache ,\ /*Def 1*/ "use" ,\ /*Def 2*/ "update" ,\ /*Min_value*/ "any" ,\ /*Max Value*/ "any" \ ); declare_name (local_mode); get_cl_param( \ /*argc*/ argc ,\ /*argv*/ argv ,\ /*output*/ &le ,\ /*Name*/ "-local_mode" ,\ /*Flag*/ &garbage ,\ /*TYPE*/ "W_F" ,\ /*OPTIONAL?*/ OPTIONAL ,\ /*MAX Nval*/ 1 ,\ /*DOC*/ "Mode for choosing the Neighborhood (bubble or window)\nWhen selecting window, maximum distance becomes the window 1/2 size, in residues\nWhen using sphere, maximum_distance is the sphere radius in Angstrom" ,\ /*Parameter*/ &local_mode ,\ /*Def 1*/ "sphere" ,\ /*Def 2*/ "window" ,\ /*Min_value*/ "any" ,\ /*Max Value*/ "any" \ ); /*PARAMETER PROTOTYPE: IN */ template_file_list=declare_char (100, STRING); n_template_file=get_cl_param( \ /*argc*/ argc , \ /*argv*/ argv , \ /*output*/ &le ,\ /*Name*/ "-template_file" ,\ /*Flag*/ &garbage ,\ /*TYPE*/ "S" ,\ /*OPTIONAL?*/ OPTIONAL ,\ /*MAX Nval*/ 1000 ,\ /*DOC*/ "List of templates file for the sequences",\ /*Parameter*/ template_file_list , \ /*Def 1*/ "_SELF_P_",\ /*Def 2*/ "stdin" ,\ /*Min_value*/ "any" ,\ /*Max Value*/ "any" \ ); /*PARAMETER PROTOTYPE: MODE */ declare_name (mode); get_cl_param( \ /*argc*/ argc , \ /*argv*/ argv , \ /*output*/ &le ,\ /*Name*/ "-mode" ,\ /*Flag*/ &garbage ,\ /*TYPE*/ "S" ,\ /*OPTIONAL?*/ OPTIONAL ,\ /*MAX Nval*/ 1 ,\ /*DOC*/ "Mode: irmsd, ",\ /*Parameter*/ &mode , \ /*Def 1*/ "irmsd",\ /*Def 2*/ "stdin" ,\ /*Min_value*/ "any" ,\ /*Max Value*/ "any" \ ); get_cl_param(\ /*argc*/ argc ,\ /*argv*/ argv ,\ /*output*/ &le ,\ /*Name*/ "-prot_min_sim" ,\ /*Flag*/ &prot_min_sim ,\ /*TYPE*/ "D" ,\ /*OPTIONAL?*/ OPTIONAL ,\ /*MAX Nval*/ 1 ,\ /*DOC*/ "Minimum similarity between a sequence and its PDB target" ,\ /*Parameter*/ &prot_min_sim ,\ /*Def 1*/ "0" ,\ /*Def 2*/ "20" ,\ /*Min_value*/ "any" ,\ /*Max Value*/ "any" \ ); set_int_variable ("prot_min_sim", prot_min_sim); get_cl_param(\ /*argc*/ argc ,\ /*argv*/ argv ,\ /*output*/ &le ,\ /*Name*/ "-prot_max_sim" ,\ /*Flag*/ &prot_max_sim ,\ /*TYPE*/ "D" ,\ /*OPTIONAL?*/ OPTIONAL ,\ /*MAX Nval*/ 1 ,\ /*DOC*/ "Maximum similarity between a sequence and its BLAST relatives" ,\ /*Parameter*/ &prot_max_sim ,\ /*Def 1*/ "90" ,\ /*Def 2*/ "100" ,\ /*Min_value*/ "any" ,\ /*Max Value*/ "any" \ ); set_int_variable ("prot_max_sim", prot_max_sim); get_cl_param(\ /*argc*/ argc ,\ /*argv*/ argv ,\ /*output*/ &le ,\ /*Name*/ "-prot_min_cov" ,\ /*Flag*/ &prot_min_cov ,\ /*TYPE*/ "D" ,\ /*OPTIONAL?*/ OPTIONAL ,\ /*MAX Nval*/ 1 ,\ /*DOC*/ "Minimum coverage of a sequence by its BLAST relatives" ,\ /*Parameter*/ &prot_min_cov ,\ /*Def 1*/ "0" ,\ /*Def 2*/ "0" ,\ /*Min_value*/ "any" ,\ /*Max Value*/ "any" \ ); set_int_variable ("prot_min_cov", prot_min_cov); get_cl_param(\ /*argc*/ argc ,\ /*argv*/ argv ,\ /*output*/ &le ,\ /*Name*/ "-pdb_min_sim" ,\ /*Flag*/ &pdb_min_sim ,\ /*TYPE*/ "D" ,\ /*OPTIONAL?*/ OPTIONAL ,\ /*MAX Nval*/ 1 ,\ /*DOC*/ "Minimum similarity between a sequence and its PDB target" ,\ /*Parameter*/ &pdb_min_sim ,\ /*Def 1*/ "35" ,\ /*Def 2*/ "35" ,\ /*Min_value*/ "any" ,\ /*Max Value*/ "any" \ ); set_int_variable ("pdb_min_sim", pdb_min_sim); get_cl_param( \ /*argc*/ argc ,\ /*argv*/ argv ,\ /*output*/ &le ,\ /*Name*/ "-pdb_max_sim" ,\ /*Flag*/ &pdb_max_sim ,\ /*TYPE*/ "D" ,\ /*OPTIONAL?*/ OPTIONAL ,\ /*MAX Nval*/ 1 ,\ /*DOC*/ "Maximum similarity between a sequence and its PDB target" ,\ /*Parameter*/ &pdb_max_sim ,\ /*Def 1*/ "100" ,\ /*Def 2*/ "0" ,\ /*Min_value*/ "any" ,\ /*Max Value*/ "any" \ ); set_int_variable ("pdb_max_sim", pdb_max_sim); get_cl_param( \ /*argc*/ argc ,\ /*argv*/ argv ,\ /*output*/ &le ,\ /*Name*/ "-pdb_min_cov" ,\ /*Flag*/ &pdb_min_cov ,\ /*TYPE*/ "D" ,\ /*OPTIONAL?*/ OPTIONAL ,\ /*MAX Nval*/ 1 ,\ /*DOC*/ "Minimum coverage of a sequence by its PDB target" ,\ /*Parameter*/ &pdb_min_cov ,\ /*Def 1*/ "50" ,\ /*Def 2*/ "25" ,\ /*Min_value*/ "any" ,\ /*Max Value*/ "any" \ ); set_int_variable ("pdb_min_cov", pdb_min_cov); declare_name (pdb_blast_server); get_cl_param(\ /*argc*/ argc ,\ /*argv*/ argv ,\ /*output*/ &le ,\ /*Name*/ "-pdb_blast_server" ,\ /*Flag*/ &garbage ,\ /*TYPE*/ "W_F" ,\ /*OPTIONAL?*/ OPTIONAL ,\ /*MAX Nval*/ 1 ,\ /*DOC*/ "ND" ,\ /*Parameter*/&pdb_blast_server ,\ /*Def 1*/ "EBI" ,\ /*Def 2*/ "default" ,\ /*Min_value*/ "any" ,\ /*Max Value*/ "any" \ ); declare_name (prot_blast_server); get_cl_param(\ /*argc*/ argc ,\ /*argv*/ argv ,\ /*output*/ &le ,\ /*Name*/ "-blast" ,\ /*Flag*/ &garbage ,\ /*TYPE*/ "W_F" ,\ /*OPTIONAL?*/ OPTIONAL ,\ /*MAX Nval*/ 1 ,\ /*DOC*/ "ND" ,\ /*Parameter*/&prot_blast_server ,\ /*Def 1*/ "" ,\ /*Def 2*/ "" ,\ /*Min_value*/ "any" ,\ /*Max Value*/ "any" \ ); //make sure that -blast and -blast_server are both supported blast>blast_server if ( !prot_blast_server[0]) { get_cl_param( \ /*argc*/ argc ,\ /*argv*/ argv ,\ /*output*/ &le ,\ /*Name*/ "-blast_server" ,\ /*Flag*/ &garbage ,\ /*TYPE*/ "W_F" ,\ /*OPTIONAL?*/ OPTIONAL ,\ /*MAX Nval*/ 1 ,\ /*DOC*/ "ND" ,\ /*Parameter*/&prot_blast_server ,\ /*Def 1*/ "EBI" ,\ /*Def 2*/ "default" ,\ /*Min_value*/ "any" ,\ /*Max Value*/ "any" \ ); } set_string_variable ("blast_server", prot_blast_server); declare_name (pdb_db); get_cl_param(\ /*argc*/ argc ,\ /*argv*/ argv ,\ /*output*/ &le ,\ /*Name*/ "-pdb_db" ,\ /*Flag*/ &garbage ,\ /*TYPE*/ "W_F" ,\ /*OPTIONAL?*/ OPTIONAL ,\ /*MAX Nval*/ 1 ,\ /*DOC*/ "Non Redundant PDB database" ,\ /*Parameter*/&pdb_db ,\ /*Def 1*/ "pdb" ,\ /*Def 2*/ "default" ,\ /*Min_value*/ "any" ,\ /*Max Value*/ "any" \ ); set_string_variable ("pdb_db", pdb_db); declare_name (prot_db); get_cl_param(\ /*argc*/ argc ,\ /*argv*/ argv ,\ /*output*/ &le ,\ /*Name*/ "-protein_db" ,\ /*Flag*/ &garbage ,\ /*TYPE*/ "W_F" ,\ /*OPTIONAL?*/ OPTIONAL ,\ /*MAX Nval*/ 1 ,\ /*DOC*/ "ND" ,\ /*Parameter*/&prot_db ,\ /*Def 1*/ "uniprot" ,\ /*Def 2*/ "default" ,\ /*Min_value*/ "any" ,\ /*Max Value*/ "any" \ ); get_cl_param( \ /*argc*/ argc ,\ /*argv*/ argv ,\ /*output*/ &le ,\ /*Name*/ "-gapped" ,\ /*Flag*/ &garbage ,\ /*TYPE*/ "D" ,\ /*OPTIONAL?*/ OPTIONAL ,\ /*MAX Nval*/ 1 ,\ /*DOC*/ "ND" ,\ /*Parameter*/&gapped ,\ /*Def 1*/ "0" ,\ /*Def 2*/ "1" ,\ /*Min_value*/ "any" ,\ /*Max Value*/ "any" \ ); get_cl_param( \ /*argc*/ argc ,\ /*argv*/ argv ,\ /*output*/ &le ,\ /*Name*/ "-min_ncol" ,\ /*Flag*/ &garbage ,\ /*TYPE*/ "D" ,\ /*OPTIONAL?*/ OPTIONAL ,\ /*MAX Nval*/ 1 ,\ /*DOC*/ "minimum number of columns (negative: fraction)" ,\ /*Parameter*/&min_ncol ,\ /*Def 1*/ "4" ,\ /*Def 2*/ "1" ,\ /*Min_value*/ "any" ,\ /*Max Value*/ "any" \ ); // set the correct mode: if ( strm (argv[0], "trmsd"))sprintf (mode, "trmsd"); set_string_variable ("prot_db", prot_db); if (argc==1){myexit (EXIT_SUCCESS);} if ( strm (outfile,"no"))n_out_aln_format=0; get_cl_param( argc, argv,&le, NULL,NULL,NULL,0,0,NULL); prepare_cache (cache); if (strm ( aln, "")) sprintf ( aln, "%s", argv[1]); if (!is_aln (aln)) { printf_exit (EXIT_FAILURE, stderr, "\n\n---- ERROR: File %s must be a valid alignment [FATAL:%s-%s]\n\n",aln,argv[0], PROGRAM); } pdb_param=vcalloc ( 1, sizeof(Pdb_param)); pdb_param->similarity_threshold=similarity_threshold; pdb_param->md_threshold=md_threshold; pdb_param->maximum_distance=maximum_distance; if ( n_excluded_nb>0) pdb_param->n_excluded_nb=n_excluded_nb; else if ( n_excluded_nb==-1) pdb_param->n_excluded_nb=(int)((float)maximum_distance/(float)1.57); /* Exclude all the nb within the bubble at +1, +2, +n*/ pdb_param->print_rapdb=print_rapdb; pdb_param->comparison_io=comparison_io; pdb_param->local_mode=local_mode; pdb_param->color_mode=lower_string (color_mode); pdb_param->filter=filter; pdb_param->filter_aln=filter_aln; pdb_param->irmsd_graph=irmsd_graph; pdb_param->nirmsd_graph=nirmsd_graph; sprintf ( list_file[n_list++], "S%s", aln); if (!strm (repeat_seq, "")) { sprintf ( template_file_list[0], "%s", process_repeat (list_file[0], repeat_seq, repeat_pdb)); fprintf ( le, "\n##Turn a repeat List into a Template File\n"); le=display_file_content (le,template_file_list[0]); fprintf ( le, "\n\n"); } S=read_seq_in_n_list (list_file, n_list, NULL, NULL); le=display_sequences_names ( S,le,0, 0); if ( n_template_file) { fprintf ( le, "\nLooking For Sequence Templates:\n"); for ( a=0; a< n_template_file; a++) { fprintf ( le, "\n\tTemplate Type: [%s] Mode Or File: [%s] [Start", template_type2type_name(template_file_list[a]), template_file_list[a]); S=seq2template_seq(S, template_file_list[a], F); fprintf ( le, "]"); } } if ( !strm (run_name, "default")) { F=parse_fname(run_name); sprintf (F->name, "%s", F->full); } else { F=parse_fname (aln); } for ( a=0; a< S->nseq; a++) { char *p; p=seq2T_value (S, a, "template_file", "_P_"); if (p)sprintf (S->file[a], "%s",p); } CL=declare_constraint_list ( S,NULL, NULL, 0,NULL, NULL); CL->T=vcalloc (S->nseq,sizeof (Ca_trace*)); for ( n_pdb=0,a=0; anseq; a++) { if ( !is_pdb_file ( S->file[a])){CL->T[a]=NULL;continue;} CL->T[a]=read_ca_trace (S->file[a], "ATOM"); CL->T[a]=trim_ca_trace (CL->T[a], S->seq[a]); (CL->T[a])->pdb_param=pdb_param; n_pdb++; } A=declare_aln (S); A->residue_case=KEEP_CASE; A=main_read_aln(aln, A); EA=copy_aln (A, EA); A->CL=CL; if ( strm (apdb_outfile, "default")) sprintf ( apdb_outfile, "%s.apdb_result", F->name); if ( n_pdb<2) { FILE *fp; fp=vfopen (apdb_outfile, "w"); fprintf (fp, "\nYour Alignment Does Not Contain Enough Sequences With a known Structure\n"); fprintf (fp, "To Use APDB, your alignment must include at least TWO sequences with a known structure.\n"); fprintf (fp, "These sequences must be named according to their PDB identifier, followed by the chain index (if any) ex: 1fnkA\n"); fprintf (fp, "[FATAL:%s]\n", PROGRAM); vfclose (fp); } else if ( strm (mode, "irmsd")) { EA=analyse_pdb ( A, EA, apdb_outfile); } else if ( strm (mode, "msa2tree") || strm (mode, "trmsd")) { EA=msa2struc_dist ( A, EA,F->name, gapped, min_ncol); } le=display_output_filename ( le, "APDB_RESULT", "APDB_RESULT_FORMAT_01", apdb_outfile, CHECK); if ( n_pdb>=2) { declare_name (file_name); for ( a=0; a< n_out_aln_format; a++) { if ( strm2( outfile, "stdout", "stderr"))sprintf (file_name, "%s", outfile); else if ( strm (outfile, "default")) sprintf (file_name, "%s.%s",F->name, out_aln_format[a]); else sprintf (file_name, "%s.%s",outfile,out_aln_format[a]); output_format_aln (out_aln_format[a],A,EA,file_name); le=display_output_filename ( le, "MSA", out_aln_format[a], file_name, CHECK); } } return EXIT_SUCCESS; } Constraint_list * set_constraint_list4align_pdb (Constraint_list *CL,int seq, char *dp_mode, char *local_mode, char *param_file) { static Constraint_list *PWCL; static Pdb_param *pdb_param; char **x; int n; if ( !CL) { free_constraint_list (PWCL); return NULL; } else if ( !PWCL) { PWCL=declare_constraint_list ( CL->S,NULL, NULL, 0,NULL, NULL); pdb_param=vcalloc ( 1, sizeof(Pdb_param)); pdb_param->N_ca=0; pdb_param->max_delta=2.0; pdb_param->maximum_distance=14; declare_name (pdb_param->local_mode); sprintf (pdb_param->local_mode, "%s", local_mode); pdb_param->scale=50; PWCL->pw_parameters_set=1; PWCL->S=CL->S; PWCL->lalign_n_top=10; PWCL->sw_min_dist=10; PWCL->T=vcalloc ( (PWCL->S)->nseq, sizeof (Ca_trace*)); PWCL->extend_jit=0; PWCL->maximise=1; /*PWCL->gop=-40;*/ PWCL->gop=-50; PWCL->gep=-20; sprintf (CL->matrix_for_aa_group, "vasiliky"); PWCL->use_fragments=0; PWCL->ktup=0; PWCL->TG_MODE=1; } if ( param_file && check_file_exists ( param_file) ) { if ( (x=get_parameter ( "-nca", &n, param_file))!=NULL){pdb_param->N_ca=atoi(x[0]);free_char (x, -1);} if ( (x=get_parameter ( "-max_delta", &n, param_file))!=NULL){pdb_param->max_delta=atof(x[0]);free_char (x, -1);} if ( (x=get_parameter ( "-maximum_distance", &n, param_file))!=NULL){pdb_param->maximum_distance=atoi(x[0]);free_char (x, -1);} if ( (x=get_parameter ( "-local_mode", &n, param_file))!=NULL){sprintf (pdb_param->local_mode, "%s",x[0]);free_char (x, -1);} if ( (x=get_parameter ( "-scale", &n, param_file))!=NULL){pdb_param->scale=atoi(x[0]);free_char (x, -1);} if ( (x=get_parameter ( "-gapopen", &n, param_file))!=NULL){PWCL->gop=atoi(x[0]);free_char (x, -1);} if ( (x=get_parameter ( "-gapext" , &n, param_file))!=NULL){PWCL->gep=atof(x[0]);free_char (x, -1);} } sprintf ( PWCL->dp_mode, "%s", dp_mode); if (strm (PWCL->dp_mode, "lalign"))sprintf (PWCL->dp_mode,"sim_pair_wise_lalign"); else if (strm (PWCL->dp_mode, "sw"))sprintf (PWCL->dp_mode,"gotoh_pair_wise_sw"); local_mode=pdb_param->local_mode; if ( strm ( local_mode, "hasch_ca_trace_nb")) PWCL->evaluate_residue_pair=evaluate_ca_trace_nb; else if ( strm ( local_mode, "hasch_ca_trace_bubble")) PWCL->evaluate_residue_pair=evaluate_ca_trace_bubble; else if ( strm ( local_mode, "hasch_ca_trace_sap1_bubble")) PWCL->evaluate_residue_pair=evaluate_ca_trace_sap1_bubble; else if ( strm ( local_mode, "hasch_ca_trace_sap2_bubble")) PWCL->evaluate_residue_pair=evaluate_ca_trace_sap2_bubble; else if ( strm ( local_mode, "hasch_ca_trace_transversal")) PWCL->evaluate_residue_pair=evaluate_ca_trace_transversal; else if ( strm ( local_mode, "hasch_ca_trace_bubble_2")) PWCL->evaluate_residue_pair=evaluate_ca_trace_bubble_2; else if ( strm ( local_mode, "hasch_ca_trace_bubble_3")) PWCL->evaluate_residue_pair=evaluate_ca_trace_bubble_3; else if ( strm ( local_mode, "custom_pair_score_function1")) PWCL->evaluate_residue_pair=custom_pair_score_function1; else if ( strm ( local_mode, "custom_pair_score_function2")) PWCL->evaluate_residue_pair=custom_pair_score_function2; else if ( strm ( local_mode, "custom_pair_score_function3")) PWCL->evaluate_residue_pair=custom_pair_score_function3; else if ( strm ( local_mode, "custom_pair_score_function4")) PWCL->evaluate_residue_pair=custom_pair_score_function4; else if ( strm ( local_mode, "custom_pair_score_function5")) PWCL->evaluate_residue_pair=custom_pair_score_function5; else if ( strm ( local_mode, "custom_pair_score_function6")) PWCL->evaluate_residue_pair=custom_pair_score_function6; else if ( strm ( local_mode, "custom_pair_score_function7")) PWCL->evaluate_residue_pair=custom_pair_score_function7; else if ( strm ( local_mode, "custom_pair_score_function8")) PWCL->evaluate_residue_pair=custom_pair_score_function8; else if ( strm ( local_mode, "custom_pair_score_function9")) PWCL->evaluate_residue_pair=custom_pair_score_function9; else if ( strm ( local_mode, "custom_pair_score_function10")) PWCL->evaluate_residue_pair=custom_pair_score_function10; else { fprintf ( stderr, "\n%s is an unknown hasch mode, [FATAL]\n", local_mode); myexit (EXIT_FAILURE); } if ( PWCL->T[seq]); else { PWCL->T[seq]=read_ca_trace (is_pdb_struc((CL->S)->name[seq]), "ATOM"); (PWCL->T[seq])->pdb_param=pdb_param; PWCL->T[seq]=trim_ca_trace (PWCL->T[seq], (CL->S)->seq[seq]); PWCL->T[seq]=hasch_ca_trace(PWCL->T[seq]); } return PWCL; } int evaluate_ca_trace_nb (Constraint_list *CL, int s1, int r1, int s2, int r2) { return (int)(neighborhood_match(CL, s1,r1, s2, r2, (CL->T[s1])->Chain,(CL->T[s2])->Chain )); } int evaluate_ca_trace_sap2_bubble (Constraint_list *CL, int s1, int r1, int s2, int r2) { return sap2_neighborhood_match (CL, s1, r1, s2, r2, (CL->T[s1])->Bubble,(CL->T[s2])->Bubble ); } int evaluate_ca_trace_sap1_bubble (Constraint_list *CL, int s1, int r1, int s2, int r2) { /* Function documentation: start int evaluate_ca_trace_sap1_bubble (Constraint_list *CL, int s1, int s2, int r1, int r2) This function evaluates the cost for matching two residues: a1 is the cost for matching the two neighborood ( bubble type), using sap a1: [0,+100], +100 is the best possible match. a2 is the residue type weight: min=worst substitution value best=best of r1/r1, r2/r2-min a2=(r1/r2 -min)/best --> a1:[0, 100] score=a1*a2-->[-inf, +10000]; */ float a1; a1=(int) sap1_neighborhood_match (CL, s1, r1, s2, r2, (CL->T[s1])->Bubble,(CL->T[s2])->Bubble ); return (int)a1; } int evaluate_ca_trace_bubble (Constraint_list *CL, int s1, int r1, int s2, int r2) { /* Function documentation: start int evaluate_ca_trace_bubble (Constraint_list *CL, int s1, int s2, int r1, int r2) This function evaluates the cost for matching two residues: a1 is the cost for matching the two neighborood ( bubble type) a1: [-inf,+100-scale], +100-scale is the best possible match. */ float a1; a1=(int) neighborhood_match (CL, s1, r1, s2, r2, (CL->T[s1])->Bubble,(CL->T[s2])->Bubble )-((CL->T[s1])->pdb_param)->scale; return a1; } int evaluate_ca_trace_transversal (Constraint_list *CL, int s1, int r1, int s2, int r2) { return (int)(transversal_match (CL, s1, r1, s2, r2, (CL->T[s1])->Transversal,(CL->T[s2])->Transversal )); } int evaluate_ca_trace_bubble_3 (Constraint_list *CL, int s1, int r1, int s2, int r2) { /*This Mode evaluates : 1-The Bubble 2-The Match of the transversal residues */ int a1, l1; int a2, l2; int a; l1=MAX(( (CL->T[s1])->Chain )->nb[r1][0] ,((CL->T[s2])->Chain )->nb[r2][0]); l2=MAX(( (CL->T[s1])->Bubble)->nb[r1][0], ((CL->T[s2])->Bubble)->nb[r2][0]); a1=(int)(neighborhood_match (CL, s1, r1, s2, r2, (CL->T[s1])->Bubble,(CL->T[s2])->Bubble )); a2=(int)(transversal_match (CL, s1, r1, s2, r2, (CL->T[s1])->Transversal,(CL->T[s2])->Transversal )); if ( !l1 && !l2)return 0; a=(a1+a2)/2; return a; } int evaluate_ca_trace_bubble_2 (Constraint_list *CL, int s1, int r1, int s2, int r2) { /*This Mode evaluates : 1-The Ca neighborhood 2-The Bubble */ return (int)((neighborhood_match (CL, s1, r1, s2, r2, (CL->T[s1])->Chain,(CL->T[s2])->Chain ))); } /*********************************************************************************************/ /* */ /* FUNCTIONS FOR COMPARING TWO NEIGHBORHOODS:START */ /* */ /*********************************************************************************************/ float matrix_match (Constraint_list *CL, int s1, int r1, int s2, int r2, Struct_nb *nbs1, Struct_nb *nbs2) { /* Function documentation: start float matrix_match (Constraint_list *CL, int s1, int s2, int r1, int r2, Struct_nb *nbs1, Struct_nb *nbs2) This function evaluates the matrix for matching two residues: min=worst substitution value best=best of r1/r1, r2/r2-min a2=(r1/r2 -min)/best --> a1:[0, 100] score=a1*a2-->[-inf, +10000]; */ float a2; float m1, m2, m; static float min=0; int a, b; if ( !CL->M) { CL->M=read_matrice ( "pam250mt"); min=CL->M[0][0]; for ( a=0; a< 26; a++) for ( b=0; b< 26; b++)min=MIN(min, CL->M[a][b]); } if ( r1<=0 || r2<=0)return 0; m1=CL->M[(CL->S)->seq[s1][r1-1]-'A'][(CL->S)->seq[s1][r1-1]-'A']-min; m2=CL->M[(CL->S)->seq[s2][r2-1]-'A'][(CL->S)->seq[s2][r2-1]-'A']-min; m=MAX(m1, m2); a2=(CL->M[(CL->S)->seq[s1][r1-1]-'A'][(CL->S)->seq[s2][r2-1]-'A']-min)/m; return a2; } float transversal_match (Constraint_list *CL, int s1, int r1, int s2, int r2, Struct_nb *nbs1, Struct_nb *nbs2) { int a, l1, l2; float score=0; float delta, max_delta; float max; Pdb_param*PP; PP=(CL->T[s1])->pdb_param; max_delta=PP->max_delta; l1=nbs1->nb[r1][0]; l2=nbs2->nb[r2][0]; if ( l1!=l2 || l1<(PP->N_ca)) return 0; max=MAX(l1, l2)*max_delta; for ( delta=0,a=0; a< l2 ; a++) { delta+=max_delta-FABS((nbs1->d_nb[r1][a]-nbs2->d_nb[r2][a])); } score=(delta*100)/max; return score; } float neighborhood_match (Constraint_list *CL, int s1, int r1, int s2, int r2, Struct_nb *nbs1, Struct_nb *nbs2) { static float **table; static int table_size; int a, b, l1, l2; float score=0; float ins, del, sub; float delta, max_delta; float max; Pdb_param*PP; PP=(CL->T[s1])->pdb_param; max_delta=PP->max_delta; if ( r1> 0 && r2 >0) {r1--; r2--;} else return 0; l1=nbs1->nb[r1][0]; l2=nbs2->nb[r2][0]; if (table_size< (MAX(l1, l2)+1)) { table_size=MAX(l1, l2)+1; if ( table)free_float (table, -1); table=NULL; } if ( !table) table=declare_float (table_size, table_size); max=MAX(l1, l2)*max_delta; if ( max==0)return 0; table[0][0]=0; for ( b=1; b<=l2; b++) { table[0][b]=0; } for ( a=1; a<=l1; a++) { table[a][0]=0; for ( b=1; b<=l2 ; b++) { delta=max_delta-FABS((nbs1->d_nb[r1][a]-nbs2->d_nb[r2][b])); del=table[a-1][b]; ins=table[a][b-1]; sub= table[a-1][b-1]+delta; if ( del >= ins && del >= sub)score=del; else if ( ins >= del && ins >= sub) score=ins; else score=sub; table[a][b]=score; } } score=((((score)*100)/max)); return score; } float sap1_neighborhood_match (Constraint_list *CL, int s1, int r1, int s2, int r2, Struct_nb *nbs1, Struct_nb *nbs2) { /* Function documentation: start float sap1_neighborhood_match (Constraint_list *CL, int s1, int s2, int r1, int r2, Struct_nb *nbs1, Struct_nb *nbs2) This function is adapted from Taylor, Orengo, Protein Structure Alignment JMB 1989, (208)1-22 It is the first function where score= A/(|dra-drb|+b) Function documentation: end */ static float **table; static int table_size; int a, b, l1, l2; float score=0; float ins, del, sub; float delta; float max; int A=50; int B=5; if ( r1> 0 && r2 >0) {r1--; r2--;} else return 0; l1=nbs1->nb[r1][0]; l2=nbs2->nb[r2][0]; if (table_size< (MAX(l1, l2)+1)) { table_size=MAX(l1, l2)+1; if ( table)free_float (table, -1); table=NULL; } if ( !table) table=declare_float (table_size, table_size); max=MAX(l1, l2)*(A/B); if ( max==0)return 0; table[0][0]=0; for ( b=1; b<=l2; b++) { table[0][b]=0; } for ( a=1; a<=l1; a++) { table[a][0]=0; for ( b=1; b<=l2 ; b++) { delta=A/(FABS((nbs1->d_nb[r1][a]-nbs2->d_nb[r2][b]))+B); del=table[a-1][b]; ins=table[a][b-1]; sub= table[a-1][b-1]+delta; if ( del >= ins && del >= sub)score=del; else if ( ins >= del && ins >= sub) score=ins; else score=sub; table[a][b]=score; } } score=((score*100))/(max); return score; } float sap2_neighborhood_match (Constraint_list *CL, int s1, int r1, int s2, int r2, Struct_nb *nbs1, Struct_nb *nbs2) { /* Function documentation: start float sap1_neighborhood_match (Constraint_list *CL, int s1, int s2, int r1, int r2, Struct_nb *nbs1, Struct_nb *nbs2) This function is adapted from Taylor, Orengo, Protein Structure Alignment JMB 1989, (208)1-22 It is the first function where score= A/(|dra-drb|+b) Function documentation: end */ static float **table; static int table_size; int a, b, l1, l2; float score=0; float ins, del, sub; float delta; float max; Amino_acid **pep1; Amino_acid **pep2; static Atom *vX_1, *vY_1, *vZ_1; static Atom *vX_2, *vY_2, *vZ_2; static Atom *ca1, *ca2; float val; int A=50; int B=2; if ( r1> 0 && r2 >0) {r1--; r2--;} else return 0; /*Make up the referencial*/ pep1=(CL->T[s1])->peptide_chain; pep2=(CL->T[s2])->peptide_chain; /*Get Referencial for CA1*/ if ( (pep1[r1])->C)vX_1 =diff_atom(pep1[r1]->C,pep1[r1]->CA, vX_1); if ( (pep1[r1])->N)vY_1 =diff_atom(pep1[r1]->N,pep1[r1]->CA, vY_1); if ( (pep1[r1])->CB)vZ_1=diff_atom(pep1[r1]->CB,(pep1[r1])->CA,vZ_1); else vZ_1=add_atom (vX_1, vY_1, vZ_1); /*Get Referencial for CA2*/ if ( (pep2[r2])->C)vX_2 =diff_atom((pep2[r2])->C,(pep2[r2])->CA, vX_2); if ( (pep2[r2])->N)vY_2 =diff_atom((pep2[r2])->N,(pep2[r2])->CA, vY_2); if ( (pep2[r2])->CB)vZ_2=diff_atom((pep2[r2])->CB,(pep2[r2])->CA, vZ_2); else vZ_2=add_atom (vX_2, vY_2, vZ_2); /*END OF GETTING REFERENCIAL*/ /*Test if ( r1>1 && r2>1) { fprintf (stdout,"\n\t*******"); fprintf (stdout, "RESIDUE %d %c", r1, (CL->S)->seq[s1][r1]); if ( (pep1[r1])->CA)fprintf (stdout,"\n\tCA ");print_atom (pep1[r1]->CA ); if ( (pep1[r1])->C)fprintf (stdout,"\n\tC ");print_atom (pep1[r1]->C ); if ( (pep1[r1])->N)fprintf (stdout,"\n\tN ");print_atom (pep1[r1]->N ); if ( (pep1[r1])->CB)fprintf (stdout,"\n\tCB ");print_atom (pep1[r1]->CB ); fprintf (stdout,"\n\t*******"); fprintf (stdout,"\n\tvX ");print_atom ( vX_1); fprintf (stdout,"\n\tvY ");print_atom ( vY_1); fprintf (stdout,"\n\tvZ ");print_atom ( vZ_1); ca1= copy_atom ((pep1[r1-1])->CA, ca1); ca1 =diff_atom(ca1,(pep1[r1])->CA, ca1); fprintf (stdout,"\n\tca ");print_atom ( ca1); fprintf ( stdout, "\n\tSQ1=%d ", (int)square_atom(ca1)); ca1=reframe_atom(vX_1, vY_1, vZ_1, ca1, ca1); fprintf ( stdout, "\n\tSQ2=%d ", (int)square_atom(ca1)); fprintf (stdout,"\n\tca ");print_atom ( ca1); fprintf (stdout,"\n\n"); } */ l1=nbs1->nb[r1][0]; l2=nbs2->nb[r2][0]; if (table_size< (MAX(l1, l2)+1)) { table_size=MAX(l1, l2)+1; if ( table)free_float (table, -1); table=NULL; } if ( !table) table=declare_float (table_size, table_size); max=MAX(l1, l2)*(A/B); if ( max==0)return 0; table[0][0]=0; for ( b=1; b<=l2; b++) { table[0][b]=0; } for ( a=1; a<=l1; a++) { ca1=copy_atom ((CL->T[s1])->structure[nbs1->nb[r1][a]], ca1); ca1=diff_atom(ca1,(pep1[r1])->CA, ca1); ca1=reframe_atom(vX_1, vY_1, vZ_1, ca1, ca1); table[a][0]=0; for ( b=1; b<=l2 ; b++) { ca2 =copy_atom((CL->T[s2])->structure[nbs2->nb[r2][b]], ca2); ca2 =diff_atom(ca2,(pep2[r2])->CA, ca2); ca2 =reframe_atom(vX_2, vY_2, vZ_2, ca2, ca2); ca2=diff_atom(ca2,ca1,ca2); val=square_atom (ca2); val=(float)sqrt ((double)val); delta=A/(val+B); del=table[a-1][b]; ins=table[a][b-1]; sub= table[a-1][b-1]+delta; if ( del >= ins && del >= sub)score=del; else if ( ins >= del && ins >= sub) score=ins; else score=sub; table[a][b]=score; } } score=(((score*100))/(max)-50); return score; } /*********************************************************************************************/ /* */ /* APDB */ /* */ /*********************************************************************************************/ float **** irmsdmin_apdb_filtration ( Alignment *A, float ****residues, Constraint_list *CL, Pdb_param *PP, FILE *fp) { int s1, s2, a,col1, n,n2=0, t,flag; int **pos, **list; float nirmsd, min_nirmsd,max_nirmsd,ref_sum, sum, sum2; float **normalized_len; normalized_len=declare_float (A->nseq+1, A->nseq+1); for (s1=0; s1nseq; s1++) { int l1, l2, r1, r2, p; for (s2=0; s2nseq; s2++) { for ( l1=l2=p=0; p< A->len_aln; p++) { r1=A->seq_al[s1][p]; r2=A->seq_al[s2][p]; if (!is_gap(r1) && isupper(r1))l1++; if (!is_gap(r2) && isupper(r2))l2++; } normalized_len[s1][s2]=MIN(l1,l2); } } pos=aln2pos_simple (A, A->nseq); for ( s1=0; s1< A->nseq; s1++) for ( s2=0; s2nseq; s2++) { if ( s1==s2) continue; else if (!(CL->T[A->order[s1][0]]) || !(CL->T[A->order[s2][0]]))continue; list=declare_int (A->len_aln, 2); for ( sum=0,n=0,col1=0; col1< A->len_aln; col1++) { if ( islower (A->seq_al[s1][col1]) || islower ( A->seq_al[s2][col1]))continue; else if ( pos[s1][col1]<=0 || pos[s2][col1]<=0 ) continue; else if ( residues[s1][s2][pos[s1][col1]-1][0]==0)continue; list[n][0]=pos[s1][col1]-1; list[n][1]=(int)100000*residues[s1][s2][pos[s1][col1]-1][4]; sum2+=residues[s1][s2][pos[s1][col1]-1][4]; n++; } if (n==0)return residues; sort_int_inv (list, 2, 1,0, n-1); for (sum=0,a=0; a=max_nirmsd) && flag==1)break; n2=a; } sum=ref_sum; for (a=0; anirmsd_graph) { fprintf ( stdout, "\n_NIRMSD_GRAPH %s %s POS: %4d Removed: %4d NiRMSD: %.2f", A->name[s1], A->name[s2], list[a][0],a,(nirmsd/100000)*normalized_len[s1][s2]); } } } if ( PP->print_rapdb) { for ( a=0; a0 && a<=t)fprintf ( stdout, "\nRAPDB QUANTILE REMOVE S1: %3d S2: %3d COL: %3d SCORE*100: %d", s1, s2, list[a][0], list[a][1]); else if ( list[a][1]>0 && a>t)fprintf ( stdout, "\nRAPDB QUANTILE KEEP S1: %3d S2: %3d COL: %3d SCORE*100: %d", s1, s2, list[a][0], list[a][1]); } } fprintf ( stdout, "\n# MINIMISATION FILTER ON: NiRMSD minimsation resulted in the removal of %d [out of %d] Columns On the alignment %s Vs %s\n", t, n, A->name[s1], A->name[s2]); for ( a=0; a<=t; a++) { residues[s1][s2][list[a][0]][0]=0; residues[s1][s2][list[a][0]][1]=0; residues[s1][s2][list[a][0]][2]=0; residues[s1][s2][list[a][0]][3]=0; residues[s1][s2][list[a][0]][4]=-1; } free_int (list, -1); } free_float (normalized_len, -1); return residues; } float **** quantile_apdb_filtration ( Alignment *A, float ****residues, Constraint_list *CL, Pdb_param *PP,FILE *fp) { int s1, s2, a,col1, n, t; int **pos, **list; pos=aln2pos_simple (A, A->nseq); for ( s1=0; s1< A->nseq; s1++) for ( s2=0; s2nseq; s2++) { if ( s1==s2) continue; else if (!(CL->T[A->order[s1][0]]) || !(CL->T[A->order[s2][0]]))continue; list=declare_int (A->len_aln, 2); for ( n=0,col1=0; col1< A->len_aln; col1++) { if ( islower (A->seq_al[s1][col1]) || islower ( A->seq_al[s2][col1]))continue; else if ( pos[s1][col1]<=0 || pos[s2][col1]<=0 ) continue; list[n][0]=pos[s1][col1]-1; list[n][1]=(int)100*residues[s1][s2][pos[s1][col1]-1][4]; n++; } sort_int_inv (list, 2, 1,0, n-1); t=quantile_rank ( list,1, n,PP->filter); if ( PP->print_rapdb) { for ( a=0; a0 && a0 && a>t)fprintf ( stdout, "\nRAPDB QUANTILE KEEP S1: %3d S2: %3d COL: %3d SCORE*100: %d", s1, s2, list[a][0], list[a][1]); } } for ( a=0; aCL; for ( s1=0; s1< (A->S)->nseq; s1++) if ( CL->T[s1]){PP=(CL->T[s1])->pdb_param;break;} if (PP->irmsd_graph)irmsd_graph =vfopen ((irmsd_file =vtmpnam (NULL)),"w"); fprintf ( fp, "\nAPDB_RESULT_FORMAT_02\n"); residues=analyse_pdb_residues ( A, A->CL,PP); if ( PP->filter>=0)residues=quantile_apdb_filtration (A, residues, A->CL,PP, fp); else if ( PP->filter<0)residues=irmsdmin_apdb_filtration (A, residues, A->CL,PP, fp); pos=aln2pos_simple (A, A->nseq); /*Compute the alignment length for normalization*/ normalize_len=declare_float (A->nseq+1, A->nseq+1); for (s1=0; s1nseq; s1++) { int l1, l2, r1, r2; for (s2=0; s2nseq; s2++) { for ( l1=l2=p=0; p< A->len_aln; p++) { r1=A->seq_al[s1][p]; r2=A->seq_al[s2][p]; if (!is_gap(r1) && isupper(r1))l1++; if (!is_gap(r2) && isupper(r2))l2++; } normalize_len[s1][s2]=MIN(l1,l2); } } msa_len=msa_tot=msa_m1=msa_m2=msa_m3=msa_m4=msa_m5=0; for ( s1=0; s1< A->nseq; s1++) { if ( !(CL->T[A->order[s1][0]]))continue; seq_len=seq_tot=seq_m1=seq_m2=seq_m3=seq_m4=seq_m5=0; for ( s2=0; s2< A->nseq; s2++) { if ( s1==s2)continue; if ( !(CL->T[A->order[s2][0]]))continue; pair_tot=pair_m1=pair_m2=pair_m3=pair_m4=pair_m5=0; for ( p=0; p< A->len_aln; p++) { r1=A->seq_al[s1][p]; r2=A->seq_al[s2][p]; b=pos[s1][p]-1; if (PP->filter_aln) { if (is_gap(r1) || is_gap(r2) || residues[s1][s2][b][0]==0) { A->seq_al[s1][p]=tolower(r1); A->seq_al[s2][p]=tolower(r2); } else { A->seq_al[s1][p]=toupper(r1); A->seq_al[s2][p]=toupper(r2); } } if ( PP->irmsd_graph && ( is_gap(r1) || is_gap(r2) || residues[s1][s2][b][0]==0)) { fprintf ( irmsd_graph, "\n_IRMSD_GRAPH %10s %10s ALN: %c%c iRMSD: -1.00", A->name[s1], A->name[s2],A->seq_al[s1][p], A->seq_al[s2][p]); } if (is_gap(r1) || is_gap(r2) || residues[s1][s2][b][0]==0)continue; pair_tot++; /*APDB*/ m2=(residues[s1][s2][b][2]*100)/residues[s1][s2][b][0]; if (m2>PP->similarity_threshold){pair_m3++;} /*iRMSD*/ m4=residues[s1][s2][b][4]; if ( PP->irmsd_graph ) { fprintf ( irmsd_graph, "\nIRMSD_GRAPH %10s %10s ALN: %c%c iRMSD: %.2f", A->name[s1], A->name[s2],A->seq_al[s1][p], A->seq_al[s2][p], m4); } pair_m4+=m4; } pair_len=normalize_len[s1][s2]; if ( s1>s2) { fprintf ( pairwise, "\n\n#PAIRWISE: %s Vs %s",A->name[s1], A->name[s2]); fprintf ( pairwise, "\n\tPAIRWISE EVALUATED: %6.2f %% [%s Vs %s] ", (pair_len==0)?-1:(pair_tot*100)/pair_len,A->name[s1], A->name[s2]); fprintf ( pairwise, "\n\tPAIRWISE APDB: %6.2f %% [%s Vs %s] ", (pair_tot==0)?-1:(pair_m3*100)/pair_tot,A->name[s1], A->name[s2]); fprintf ( pairwise, "\n\tPAIRWISE iRMSD: %6.2f Angs [%s Vs %s]", (pair_tot==0)?-1:pair_m4/pair_tot,A->name[s1], A->name[s2]); fprintf ( pairwise, "\n\tPAIRWISE NiRMSD: %6.2f Angs [%s Vs %s] [%d pos]", (pair_tot==0)?-1:(pair_m4*pair_len)/(pair_tot*pair_tot), A->name[s1], A->name[s2], (int)pair_tot); fprintf ( pairwise, "\n\tRAPDB PAIRS PAIRWISE N_NONEMPTY_PAIRS %d N_MAXIMUM_PAIRS %d",(int) pair_tot, (int)pair_len); msa_m3+=pair_m3; msa_m4+=pair_m4; msa_tot+=pair_tot; msa_len+=pair_len; } seq_m3+=pair_m3; seq_m4+=pair_m4; seq_tot+=pair_tot; seq_len+=pair_len; } fprintf ( average, "\n\n#AVERAGE For Sequence %s", A->name[s1]); fprintf ( average, "\n\tAVERAGE EVALUATED: %6.2f %% [%s]", (seq_len==0)?-1:(seq_tot*100)/seq_len, A->name[s1]); fprintf ( average, "\n\tAVERAGE APDB: %6.2f %% [%s]", (seq_tot==0)?-1:(seq_m3*100)/seq_tot, A->name[s1]); fprintf ( average, "\n\tAVERAGE iRMSD: %6.2f Angs [%s]", (seq_tot==0)?-1:seq_m4/seq_tot, A->name[s1]); fprintf ( average, "\n\tAVERAGE NiRMS: %6.2f Angs [%s]", (seq_tot==0)?-1:(seq_m4*seq_len)/(seq_tot*seq_tot), A->name[s1]); if ( strm (PP->color_mode, "apdb"))ST->score_seq[s1]=(seq_tot==0)?-1:(seq_m3*100)/pair_tot; if (PP->print_rapdb)fprintf (average, "\n\tRAPDB PAIRS AVERAGE N_NONEMPTY_PAIRS %d N_MAXIMUM_PAIRS %d", (int)pair_tot, (int)pair_len); if ( strm (PP->color_mode, "irmsd"))ST->score_seq[s1]=(seq_tot==0)?-1:10*((seq_m4*pair_len)/(seq_tot*seq_tot)); } fprintf ( total, "\n\n#TOTAL for the Full MSA"); fprintf ( total, "\n\tTOTAL EVALUATED: %6.2f %% ", (msa_len==0)?-1:(msa_tot*100)/msa_len); fprintf ( total, "\n\tTOTAL APDB: %6.2f %% ", (msa_tot==0)?-1:(msa_m3*100)/msa_tot); fprintf ( total, "\n\tTOTAL iRMSD: %6.2f Angs", (msa_tot==0)?-1:msa_m4/msa_tot); fprintf ( total, "\n\tTOTAL NiRMSD: %6.2f Angs", (msa_tot==0)?-1:(msa_m4*msa_len)/(msa_tot*msa_tot)); if (PP->print_rapdb)fprintf (total, "\n\tRAPDB PAIRS TOTAL N_NONEMPTY_PAIRS: %d N_MAXIMUM_PAIRS %d", (int)msa_tot, (int)msa_len); if ( strm (PP->color_mode, "apdb")) ST->score_aln=ST->score=A->score_aln=A->score=(msa_tot==0)?-1:(msa_m3*100)/msa_tot; if ( strm (PP->color_mode, "irmsd"))ST->score_aln=ST->score=A->score_aln=A->score=(msa_tot==0)?-1:10*((msa_m4*msa_len)/(msa_tot*msa_tot)); vfclose (average);vfclose (total); vfclose (pairwise);if (PP->irmsd_graph)vfclose (irmsd_graph); fp=display_file_content (fp, pairwise_file); fp=display_file_content (fp, average_file); fp=display_file_content (fp, total_file); if ( PP->irmsd_graph)fp=display_file_content (fp, irmsd_file); fprintf ( fp, "\n\n# EVALUATED: Fraction of Pairwise Columns Evaluated\n"); fprintf ( fp, "# APDB: Fraction of Correct Columns according to APDB\n"); fprintf ( fp, "# iRMDS: Average iRMSD over all evaluated columns\n"); fprintf ( fp, "# NiRMDS: iRMSD*MIN(L1,L2)/Number Evaluated Columns\n"); fprintf ( fp, "# Main Parameter: -maximum_distance %.2f Angstrom\n", PP->maximum_distance); fprintf ( fp, "# Undefined values are set to -1 and indicate LOW Alignment Quality\n"); fp=print_program_information (fp, NULL); /*Color Output*/ for (iRMSD_max=0,iRMSD_min=10000,s1=0; s1nseq; s1++) for ( s2=0; s2< A->nseq; s2++) for (p=0; plen_aln; p++) { if ( residues[s1][s2][p][4]>0) { iRMSD_max=MAX(iRMSD_max, residues[s1][s2][p][4]); iRMSD_min=MAX(iRMSD_min, residues[s1][s2][p][4]); } } iRMSD_unit=iRMSD_max/8; for (p=0; p< A->len_aln; p++) for ( s1=0; s1< A->nseq; s1++) { for ( p=0; p< A->len_aln; p++) { r1=A->seq_al[s1][p]; b=pos[s1][p]-1; if ( is_gap(r1) || !(CL->T[A->order[s1][0]])) ST->seq_al[s1][p]=NO_COLOR_RESIDUE; else { float tot_m2=0, tot_m4=0, v=0; seq_m2=seq_m4=0; for (s2=0; s2< A->nseq; s2++) { r2=A->seq_al[s1][p]; if ( s1==s2) continue; if (is_gap(r2) || !(CL->T[A->order[s1][0]]) || residues[s1][s2][b][0]==0)continue; seq_m2+=m2=(residues[s1][s2][b][2]*100)/residues[s1][s2][b][0]; tot_m2++; m4=residues[s1][s2][b][4]; if (m4>=0) { seq_m4+=m4; tot_m4++; } } if (strm ( PP->color_mode, "apdb")) { if (tot_m2==0)v=NO_COLOR_RESIDUE; else v=MIN((seq_m2/(10*tot_m2)),9); } else if ( strm (PP->color_mode, "irmsd")) { if ( tot_m4==0)v=NO_COLOR_RESIDUE; else v=(8-(int)((seq_m4/(iRMSD_unit*tot_m4))))+1; } ST->seq_al[s1][p]=v; } } } for ( p=0; plen_aln; p++) ST->seq_al[A->nseq][p]=NO_COLOR_RESIDUE; ST->generic_comment=vcalloc ( 100, sizeof (int)); if ( strm (PP->color_mode, "apdb")) { sprintf ( ST->generic_comment, "# APDB Evaluation: Color Range Blue-[0 %% -- 100 %%]-Red\n# Sequence Score: APDB\n# Local Score: APDB\n\n"); } else if ( strm (PP->color_mode, "irmsd")) { sprintf ( ST->generic_comment, "\n# iRMSD Evaluation:\n# Sequence score: NiRMSD (Angstrom*10)\n# Local Score: iRMSD, Blue-[%.2f Ang. -- 0.00 Ang.]-Red \n", iRMSD_max); } fprintf ( fp, "\n"); vfclose (fp); free_int (pos, -1); return ST; } float **** analyse_pdb_residues ( Alignment *A, Constraint_list *CL, Pdb_param *pdb_param) { int **pos; int s1, s2, rs1, rs2; int col1, col2; float ****distances; /*Distances[Nseq][len_aln][4] distances...[0]: Number of residues within the bubble distances...[1]: Absolute difference of distance of residues within Bubble distances...[2]: Number of residues within the bubble with Delta dist < md_threshold distances ..[3]: Sum of squared difference of distances distances ..[4]: iRMSD */ float d1, d2,delta; int wd1, wd2; int in_bubble=0; int real_res1_col1=0; int real_res1_col2; int real_res2_col1; int real_res2_col2; Pdb_param *PP; int print_rapdb; float nrapdb, rapdb; Alignment *BA=NULL; PP=pdb_param; print_rapdb=PP->print_rapdb; distances=declare_arrayN(4, sizeof (float), A->nseq, A->nseq, 0, 0); /*Pre-computation of the internal distances----> T[seq]->ca_dist[len][len]*/ /*Can be avoided if distance_on_request set to 1 */ for ( s1=0; s1< A->nseq; s1++) { rs1=A->order[s1][0]; if (CL->T[rs1] && !(CL->T[rs1])->ca_dist)(CL->T[rs1])->ca_dist=measure_ca_distances(CL->T[rs1]); for ( s2=0; s2< A->nseq; s2++) { distances[s1][s2]=declare_float ( A->len_aln, 6); } } pos=aln2pos_simple (A, A->nseq); for ( s1=0; s1< A->nseq; s1++) for ( col1=0; col1< A->len_aln; col1++) for ( s2=0; s2nseq; s2++) { rs1=A->order[s1][0]; rs2=A->order[s2][0]; rapdb=0; nrapdb=0; if ( s1==s2) continue; else if (!(CL->T[rs1]) || !(CL->T[rs2]))continue; else if ( islower (A->seq_al[s1][col1]) || islower ( A->seq_al[s2][col1]))continue; else if ( pos[s1][col1]<=0 || pos[s2][col1]<=0 ) continue; if ( print_rapdb && s2>s1) { fprintf ( stdout, "RAPDB S1: %s S2: %s POS %d %d %c %d %c ", A->name[s1], A->name[s2], col1+1, pos[s1][col1],A->seq_al[s1][col1], pos[s2][col1],A->seq_al[s2][col1]); BA=copy_aln (A, BA); lower_string (BA->seq_al[s1]); lower_string (BA->seq_al[s2]); BA->seq_al[s1][col1]=toupper (BA->seq_al[s1][col1]); BA->seq_al[s2][col1]=toupper (BA->seq_al[s2][col1]); } for ( col2=0; col2len_aln; col2++) { if (pos[s1][col2]<=0 || pos[s2][col2]<=0 )continue; else if ( FABS((pos[s1][col2]-pos[s1][col1]))<=PP->n_excluded_nb)continue; else if ( FABS((pos[s2][col2]-pos[s2][col1]))<=PP->n_excluded_nb)continue; else if ( islower (A->seq_al[s1][col2]) || islower ( A->seq_al[s2][col2]))continue; real_res1_col1=pos[s1][col1]-1; real_res1_col2=pos[s1][col2]-1; real_res2_col1=pos[s2][col1]-1; real_res2_col2=pos[s2][col2]-1; d1=(CL->T[rs1])->ca_dist[real_res1_col1][real_res1_col2]; d2=(CL->T[rs2])->ca_dist[real_res2_col1][real_res2_col2]; if ( d1==UNDEFINED || d2 == UNDEFINED) continue; if ( strm ( PP->local_mode, "sphere")) { in_bubble= (d1maximum_distance && d2maximum_distance)?1:0; ; } else if ( strm ( PP->local_mode, "window")) { wd1=FABS((pos[s1][col2]-pos[s1][col1])); wd2=FABS((pos[s2][col2]-pos[s2][col1])); in_bubble= (wd1maximum_distance && wd2maximum_distance)?1:0; ; } if (in_bubble) { if ( print_rapdb && s2 >s1) { fprintf ( stdout, "NB %d %d %c %d %c ", col2, pos[s1][col2], A->seq_al[s1][col2], pos[s2][col2], A->seq_al[s2][col2]); BA->seq_al[s1][col2]=toupper (BA->seq_al[s1][col2]); BA->seq_al[s2][col2]=toupper (BA->seq_al[s2][col2]); } delta=FABS((d1-d2)); if (deltamd_threshold) distances[s1][s2][real_res1_col1][2]++; distances[s1][s2][real_res1_col1][1]+=delta; distances[s1][s2][real_res1_col1][0]++; distances[s1][s2][real_res1_col1][3]+=delta*delta; nrapdb++; rapdb+=delta*delta; } } if ( nrapdb==0)distances[s1][s2][real_res1_col1][4]=-1; else distances[s1][s2][real_res1_col1][4]=(float)sqrt((double)(rapdb/nrapdb)); if ( print_rapdb && s2>s1) { if (nrapdb==0) { fprintf ( stdout, "APDB: UNDEFINED\n"); } else { fprintf ( stdout, " APDB: %.2f ",(float)sqrt((double)(rapdb/nrapdb))); BA->residue_case=KEEP_CASE;unalign_residues (BA, s1, s2); fprintf ( stdout,"SEQ1: %s %s SEQ2: %s %s\n", BA->name[s1], BA->seq_al[s1], BA->name[s2], BA->seq_al[s2]); } } } free_aln (BA); free_int (pos, -1); return distances; } int pair_res_suitable4trmsd (int s1,int col1, int col2, Alignment *A, int **pos, Pdb_param *PP, Constraint_list *CL,int *s); int aln_column_contains_gap (Alignment *A, int c); float aln2ncol4trmsd(Alignment *A, int **pos, Constraint_list *CL, int **lc); int pair_columns_suitable4trmsd(int col1, int col2, Alignment *A, int **pos, Pdb_param *PP, Constraint_list *CL,int *s); int column_is_suitable4trmsd(int col1,Alignment *A, int **pos, Pdb_param *PP, Constraint_list *CL,int *s); NT_node trmsdmat2tree (float **dm, int **count,Alignment *A); Alignment * msa2struc_dist ( Alignment *A, Alignment *ST, char *results, int gapped, int min_ncol4trmsd) { int **pos, c; FILE *tl; int s1, s2, rs1, rs2; int col1, col2; float ****distances; float **dm,**tdm; int **count,**tcount; int print_subtrees=0; float min, max; /*Distances[Nseq][len_aln][4] distances...[0]: Number of residues within the bubble distances...[1]: Absolute difference of distance of residues within Bubble distances...[2]: Number of residues within the bubble with Delta dist < md_threshold distances ..[3]: Sum of squared difference of distances distances ..[4]: iRMSD */ Pdb_param *pdb_param; Constraint_list *CL; int a, b, ncol, npos,n; float d1, d2,delta; int wd1, wd2; int in_bubble=0; int real_res1_col1=0; int real_res1_col2; int real_res2_col1; int real_res2_col2; Pdb_param *PP; int print_rapdb; float nrapdb, rapdb; Alignment *BA=NULL; NT_node *T0,*T1,*T2,*PT, *POS; NT_node BT0, BT10,BT50, BT100=NULL,RBT; char **pair_pos_list; int ntree=0, ntree2; Alignment *B; char *pos_list; char *tot_pos_list; char *struc_tree10; char *struc_tree100; char *struc_tree50; char *struc_tree0; char *consense_file; char *color_struc_tree; int **score; int proceed=1; int **lc; int used; if (min_ncol4trmsd<0) { min_ncol4trmsd*=-1; min_ncol4trmsd=(min_ncol4trmsd*A->len_aln)/100; } else if ( min_ncol4trmsd>=A->len_aln) { min_ncol4trmsd=A->len_aln-1; } lc=declare_int (A->nseq, 2); for (a=0; anseq; a++)lc[a][0]=a; declare_name(tot_pos_list); sprintf ( tot_pos_list, "%s.struc_tree.list", results); declare_name(consense_file); sprintf (consense_file, "%s.struc_tree.consense_output", results); declare_name(pos_list); sprintf ( pos_list, "%s.pos_list", results); declare_name(struc_tree0); sprintf ( struc_tree0, "%s.struc_tree.consensus",results); declare_name(struc_tree10); sprintf ( struc_tree10, "%s.struc_tree10",results); declare_name(struc_tree100); sprintf ( struc_tree100, "%s.struc_tree100",results); declare_name(struc_tree50); sprintf ( struc_tree50, "%s.struc_tree50",results); declare_name(color_struc_tree); sprintf ( color_struc_tree, "%s.struc_tree.html", results); pair_pos_list=declare_char (A->len_aln*A->len_aln+1, 100); T1=vcalloc (A->len_aln*A->len_aln+1, sizeof (NT_node)); T2=vcalloc (A->len_aln+1, sizeof (NT_node)); PT=vcalloc (A->len_aln*A->len_aln+1, sizeof (NT_node)); POS=vcalloc (A->len_aln+1, sizeof (NT_node)); CL=A->CL; //Check all sequences have a PDB structure for (used=0,a=0; anseq; a++) { if ( ! seq2P_template_file(A->S,a)) { add_warning (stderr, "Sequence %s removed from the dataset [No Usable Structure]", A->name[a]); } else { if (used!=a) { sprintf (A->name[used], "%s", A->name[a]); sprintf (A->seq_al[used], "%s", A->seq_al[a]); for (b=0; b<4; b++)A->order[used][b]=A->order[a][b]; } used++; } } A->nseq=used; if (A->nseq<2)myexit (fprintf_error(stderr, "Two sequences at least must have a known structure")); for ( s1=0; s1< (A->S)->nseq; s1++) if ( CL->T[s1]){PP=(CL->T[s1])->pdb_param;break;} for ( s1=0; s1< A->nseq; s1++) { rs1=A->order[s1][0]; if (CL->T[rs1] && !(CL->T[rs1])->ca_dist)(CL->T[rs1])->ca_dist=measure_ca_distances(CL->T[rs1]); } pos=aln2pos_simple (A, A->nseq); dm=declare_float (A->nseq, A->nseq); count=declare_int (A->nseq, A->nseq); tdm=declare_float (A->nseq, A->nseq); tcount=declare_int (A->nseq, A->nseq); PP->maximum_distance=1000; sprintf ( PP->local_mode, "sphere"); while ((npos=aln2ncol4trmsd(A,pos,CL,lc))nseq>1) { sort_int_inv (lc,2, 1, 0,A->nseq-1); add_information (stderr, "Remove Sequence [%s] that contains %d un-suitable positions", A->name[lc[0][0]], lc[0][1]); A=remove_seq_from_aln (A, A->name[lc[0][0]]); ungap_aln (A); pos=aln2pos_simple (A, A->nseq); } if (!A->nseq) { myexit (fprintf_error(stderr,"No suitable pair of column supporting a tree")); } else fprintf ( stderr, "\n---- Number of usable positions: %d [%.2f %%]\n", npos, ((float)npos*100)/(float)A->len_aln); tl=vfopen (tot_pos_list, "w"); for (ncol=0,ntree=0, col1=0; col1< A->len_aln; col1++) { int w,tree, cont; //output_completion (stderr, col1, A->len_aln,1, "Sample Columns"); if (!gapped && aln_column_contains_gap (A, col1))continue; for ( cont=1,ntree2=0,col2=0; col2len_aln; col2++) { for (s1=0; s1< A->nseq-1; s1++) { rs1=A->order[s1][0]; if (!pair_res_suitable4trmsd (s1,col1, col2, A, pos, PP, CL, &w))continue; for ( s2=s1+1; s2nseq; s2++) { if (!pair_res_suitable4trmsd (s2,col1, col2, A, pos, PP, CL, &w))continue; rs2=A->order[s2][0]; real_res1_col1=pos[s1][col1]-1; real_res1_col2=pos[s1][col2]-1; real_res2_col1=pos[s2][col1]-1; real_res2_col2=pos[s2][col2]-1; d1=(CL->T[rs1])->ca_dist[real_res1_col1][real_res1_col2]; d2=(CL->T[rs2])->ca_dist[real_res2_col1][real_res2_col2]; delta=FABS((d1-d2)); dm[s1][s2]=dm[s2][s1]+=delta; tdm[s1][s2]=tdm[s2][s1]+=delta; tcount[s1][s2]++; tcount[s2][s1]++; count[s1][s2]++; count[s2][s1]++; } } } if ((POS[col1]=trmsdmat2tree (dm, count, A))) { T1[ntree]=POS[col1]; fprintf (tl, "\n>Tree_%d Column\n", col1+1); print_tree (T1[ntree], "newick", tl); ntree++; } } vfclose (tl); if (!ntree){fprintf ( stderr, "\nERROR: No suitable pair of column supporting a tree [FATAL]\n"); exit (EXIT_SUCCESS);} score=treelist2avg_treecmp (T1, NULL); display_output_filename( stderr,"TreeList","newick",tot_pos_list, CHECK); if (treelist_file2consense (tot_pos_list, NULL, consense_file)) { display_output_filename( stderr,"ConsenseTree","phylip",consense_file, CHECK); } else { fprintf ( stderr, "\nPhylip is not installed: the program could not produce the consense output. This is not mandatory but useful"); } //consensus tree if ((BT100=treelist2filtered_bootstrap (T1, NULL,score, 1.0))) { vfclose (print_tree (BT100,"newick", vfopen (struc_tree0, "w"))); display_output_filename( stderr,"Tree","newick",struc_tree0, CHECK); } if (print_subtrees) { if ( (BT0=trmsdmat2tree (tdm, tcount, A))) { vfclose (print_tree (BT0,"newick", vfopen (struc_tree0, "w"))); display_output_filename( stderr,"Tree","newick",struc_tree0, CHECK); } if ((BT10=treelist2filtered_bootstrap (T1, NULL,score, 0.1))) { vfclose (print_tree (BT10,"newick", vfopen (struc_tree10, "w"))); display_output_filename( stderr,"Tree","newick",struc_tree10, CHECK); } if ((BT50=treelist2filtered_bootstrap (T1, NULL, score,0.5))) { vfclose (print_tree (BT50,"newick", vfopen (struc_tree50, "w"))); display_output_filename( stderr,"Tree","newick",struc_tree50, CHECK); } } if (!BT100)BT100=treelist2filtered_bootstrap (T1, NULL,score, 1.0); RBT=BT100; if (RBT) { B=copy_aln (A, NULL); for (a=0; alen_aln; a++) { int score; Tree_sim *S=NULL; if (POS[a]) { S=tree_cmp (POS[a], RBT); score=S->uw/10; } else { score=NO_COLOR_RESIDUE; } for (b=0; bnseq; b++) { if ( is_gap (B->seq_al[b][a]) || score == NO_COLOR_RESIDUE) { B->seq_al[b][a]=NO_COLOR_RESIDUE; } else { B->seq_al[b][a]=S->uw/10; } } if (S)vfree (S); } output_format_aln ("score_html", A,B,color_struc_tree); display_output_filename( stderr,"Colored MSA","score_html",color_struc_tree, CHECK); free_aln (BA); fprintf ( stderr, "\n"); } fprintf ( stderr, "\n"); free_int (pos, -1); exit (EXIT_SUCCESS); return NULL; } NT_node trmsdmat2tree (float **dm, int **count,Alignment *A) { float min, max; int s1, s2; NT_node T; int ns; int **dm_int; ns=A->nseq; for (s1=0; s1nseq-1; s1++) for (s2=s1+1; s2nseq; s2++) { dm_int[s1][s2]=dm_int[s2][s1]=((dm[s1][s2])/(max))*100; } T=compute_std_tree_2(A, dm_int, "_TMODE_upgma"); free_int (dm_int, -1); for (s1=0; s1order[s1][0]; if ( !(CL->T[rs])){s[0]=s1; return 0;} else if (is_gap (A->seq_al[s1][col1])){s[0]=s1;return 0;} else if (is_gap (A->seq_al[s1][col2])){s[0]=s1;return 0;} else if (islower(A->seq_al[s1][col1])){s[0]=s1; return 0;} else if (islower(A->seq_al[s1][col2])){s[0]=s1; return 0;} else if ( FABS(((pos[s1][col2])-(pos[s1][col1])))<=PP->n_excluded_nb){s[0]=s1;return 0;} else if ((CL->T[rs])->ca_dist[pos[s1][col1]-1][pos[s1][col2]-1]==UNDEFINED){s[0]=s1;return 0;} return 1; } int pair_columns_suitable4trmsd(int col1, int col2, Alignment *A, int **pos, Pdb_param *PP, Constraint_list *CL,int *s) { int s1; if (!column_is_suitable4trmsd (col1, A, pos, PP, CL,s))return 0; if (!column_is_suitable4trmsd (col2, A, pos, PP, CL,s))return 0; for (s1=0; s1nseq; s1++) { int rs, rr1, rr2; rs=A->order[s1][0]; if ( FABS(((pos[s1][col2])-(pos[s1][col1])))<=PP->n_excluded_nb){s[0]=s1;return 0;} if ((CL->T[rs])->ca_dist[pos[s1][col1]-1][pos[s1][col2]-1]==UNDEFINED){s[0]=s1;return 0;} rr1=pos[s1][col1]-1; rr2=pos[s1][col2]-1; if ((CL->T[rs])->ca_dist[rr1][rr2]>PP->maximum_distance){s[0]=s1;return 0;} } return 1; } int column_is_suitable4trmsd(int col1,Alignment *A, int **pos, Pdb_param *PP, Constraint_list *CL,int *s) { int s1; for ( s1=0; s1nseq; s1++) { int rs; rs=A->order[s1][0]; if ( !(CL->T[rs])){s[0]=s1; return 0;} else if (is_gap (A->seq_al[s1][col1])){s[0]=s1;return 0;} else if (islower(A->seq_al[s1][col1])){s[0]=s1; return 0;} } return 1; } int aln_column_contains_gap (Alignment *A, int c) { int a, b; if ( !A || c>=A->len_aln || c<0) { printf ( "\nERROR: values out of range in aln_column_contains_gap [FATL:%s]\n", PROGRAM); exit (EXIT_FAILURE); } for ( a=0; anseq; a++) if ( is_gap(A->seq_al[a][c]))return 1; return 0; } float aln2ncol4trmsd(Alignment *A, int **pos, Constraint_list *CL, int **lc) { //This function estimates the number of columns suitable for constructing a trmsd int col1, s1, ncol, n, rs1, real_res1_col1; for (s1=0; s1nseq; s1++){lc[s1][0]=s1; lc[s1][1]=0;} for (ncol=0,col1=0; col1< A->len_aln; col1++) { for (n=0,s1=0; s1nseq; s1++) { real_res1_col1=pos[s1][col1]-1; rs1=A->order[s1][0]; if (real_res1_col1<0)lc[s1][1]++; else if (!((CL->T[A->order[s1][0]])->ca[real_res1_col1]))lc[s1][1]++; else n++; } if (n==A->nseq) { ncol++; } } return ncol; } float square_atom ( Atom *X) { return X->x*X->x + X->y*X->y + X->z*X->z; } Atom* reframe_atom ( Atom *X, Atom*Y, Atom *Z, Atom *IN, Atom *R) { float new_x, new_y, new_z; if ( R==NULL)R=vcalloc ( 1, sizeof (Atom)); new_x= X->x*IN->x + Y->x*IN->y +Z->x*IN->z; new_y= X->y*IN->x + Y->y*IN->y +Z->y*IN->z; new_z= X->z*IN->x + Y->z*IN->y +Z->z*IN->z; R->x=new_x; R->y=new_y; R->z=new_z; return R; } Atom* add_atom ( Atom *A, Atom*B, Atom *R) { if ( R==NULL)R=vcalloc ( 1, sizeof (Atom)); R->x=A->x+B->x; R->y=A->y+B->y; R->z=A->z+B->z; return R; } Atom* diff_atom ( Atom *A, Atom*B, Atom *R) { if ( R==NULL)R=vcalloc ( 1, sizeof (Atom)); R->x=A->x-B->x; R->y=A->y-B->y; R->z=A->z-B->z; return R; } Atom * copy_atom ( Atom *A, Atom*R) { if ( R==NULL)R=vcalloc ( 1, sizeof (Atom)); R->num=A->num; R->res_num=A->res_num; R->x=A->x; R->y=A->y; R->z=A->z; sprintf( R->type, "%s", A->type); return R; } void print_atom (Atom *A) { fprintf ( stdout, "%.2f %.2f %.2f", A->x, A->y, A->z); } /************************************************************************/ /* */ /* NUSSINOV */ /* */ /************************************************************************/ /*---------prototypes ----------*/ static void computeBasePairMatrix(int**M,char*S,int l, int T); static int backtrack(int a,int b,int**M,char*S,char*P, int T); static int basePair(char x, char y) { static short **mat; if (!mat) { char alp[20]; int a, b, c1, c2, lc1, lc2; mat=declare_short (256, 256); sprintf ( alp, "AGCTUagctu"); for (a=0; amax ){ max = numBasePairs[i][j-1]; index = n; // j not basepaired with some k such that i max ){ max = val; index=i; } for(k=i; k<=j-THRESHOLD; k++){ val = basePair(S[k],S[j]) + numBasePairs[i][k-1] + numBasePairs[k+1][j-1]; if (val > max) { max = val; index=k; } } numBasePairs[i][j]=max; if (index