--- /dev/null
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <stdarg.h>
+#include <ctype.h>
+#include <string.h>
+
+#include "io_lib_header.h"
+#include "util_lib_header.h"
+#include "define_header.h"
+#include "dp_lib_header.h"
+void print_atom ( Atom*A);
+
+float **** quantile_apdb_filtration ( Alignment *A, float ****residues, Constraint_list *CL,Pdb_param *PP, FILE *fp);
+float **** irmsdmin_apdb_filtration ( Alignment *A, float ****residues, Constraint_list *CL,Pdb_param *PP, FILE *fp);
+int apdb ( int argc, char *argv[])
+ {
+
+ Constraint_list *CL=NULL;
+ Sequence *S=NULL;
+ Alignment *A=NULL;
+ Alignment *EA=NULL;
+ Pdb_param *pdb_param;
+
+ Fname *F=NULL;
+ char *file_name;
+ int a,c;
+
+ int n_pdb;
+
+/*PARAMETERS VARIABLES*/
+ int garbage;
+ char *parameters;
+ FILE *fp_parameters;
+
+ int quiet;
+ char *se_name;
+ FILE *le=NULL;
+
+ char **list_file;
+ int n_list;
+ char **struc_to_use;
+ int n_struc_to_use;
+
+ char *aln;
+ char *repeat_seq;
+ char *repeat_pdb;
+
+ char *color_mode;
+ char *comparison_io;
+
+ int n_excluded_nb;
+
+ float maximum_distance;
+ float similarity_threshold;
+ float md_threshold;
+
+
+ int print_rapdb;
+
+ char *outfile;
+ char *run_name;
+
+ char *apdb_outfile;
+ char *cache;
+
+ char **out_aln_format;
+ int n_out_aln_format;
+
+ char *output_res_num;
+ char *local_mode;
+ float filter;
+ int filter_aln;
+ int irmsd_graph;
+ int nirmsd_graph;
+ int n_template_file;
+ char **template_file_list;
+ char *mode;
+ int prot_min_sim;
+ int prot_max_sim;
+ int prot_min_cov;
+ int pdb_min_sim;
+ int pdb_max_sim;
+ int pdb_min_cov;
+ int gapped;
+
+
+ char *prot_blast_server;
+ char *pdb_blast_server;
+
+
+ char *pdb_db;
+ char *prot_db;
+ int min_ncol;
+
+ argv=standard_initialisation (argv, &argc);
+
+/*PARAMETER PROTOTYPE: READ PARAMETER FILE */
+ declare_name (parameters);
+ get_cl_param(\
+ /*argc*/ argc ,\
+ /*argv*/ argv ,\
+ /*output*/ &le ,\
+ /*Name*/ "-parameters" ,\
+ /*Flag*/ &garbage ,\
+ /*TYPE*/ "R_F" ,\
+ /*OPTIONAL?*/ OPTIONAL ,\
+ /*MAX Nval*/ 1 ,\
+ /*DOC*/ "Read the files in the parameter file" ,\
+ /*Parameter*/ ¶meters ,\
+ /*Def 1*/ "NULL" ,\
+ /*Def 2*/ "stdin" ,\
+ /*Min_value*/ "any" ,\
+ /*Max Value*/ "any" \
+ );
+ if ( parameters && parameters[0])
+ {
+ argv[argc]=vcalloc ( VERY_LONG_STRING, sizeof(char));
+ a=0;
+ fp_parameters=vfopen (parameters, "r");
+ while ((c=fgetc (fp_parameters))!=EOF)argv[1][a++]=c;
+ vfclose (fp_parameters);
+ argv[argc][a]='\0';
+ argc++;
+ argv=break_list ( argv, &argc, "=:;, \n");
+ }
+/*PARAMETER PROTOTYPE*/
+ declare_name (se_name);
+ get_cl_param(\
+ /*argc*/ argc ,\
+ /*argv*/ argv ,\
+ /*output*/ &le ,\
+ /*Name*/ "-quiet" ,\
+ /*Flag*/ &quiet ,\
+ /*TYPE*/ "W_F" ,\
+ /*OPTIONAL?*/ OPTIONAL ,\
+ /*MAX Nval*/ 1 ,\
+ /*DOC*/ "ND" ,\
+ /*Parameter*/ &se_name ,\
+ /*Def 1*/ "stderr" ,\
+ /*Def 2*/ "/dev/null" ,\
+ /*Min_value*/ "any" ,\
+ /*Max Value*/ "any" \
+ );
+
+ le=vfopen ( se_name, "w");
+ fprintf ( le, "\nPROGRAM: %s\n",argv[0]);
+
+/*PARAMETER PROTOTYPE: IN */
+ list_file=declare_char ( 200, STRING);
+ n_list=get_cl_param(\
+ /*argc*/ argc ,\
+ /*argv*/ argv ,\
+ /*output*/ &le ,\
+ /*Name*/ "-in" ,\
+ /*Flag*/ &garbage ,\
+ /*TYPE*/ "S" ,\
+ /*OPTIONAL?*/ OPTIONAL ,\
+ /*MAX Nval*/ 200 ,\
+ /*DOC*/ "ND" ,\
+ /*Parameter*/ list_file ,\
+ /*Def 1*/ "",\
+ /*Def 2*/ "stdin" ,\
+ /*Min_value*/ "any" ,\
+ /*Max Value*/ "any" \
+ );
+/*PARAMETER PROTOTYPE: IN */
+ struc_to_use=declare_char ( 200, STRING);
+ n_struc_to_use=get_cl_param(\
+ /*argc*/ argc ,\
+ /*argv*/ argv ,\
+ /*output*/ &le ,\
+ /*Name*/ "-struc_to_use" ,\
+ /*Flag*/ &garbage ,\
+ /*TYPE*/ "S" ,\
+ /*OPTIONAL?*/ OPTIONAL ,\
+ /*MAX Nval*/ 200 ,\
+ /*DOC*/ "ND" ,\
+ /*Parameter*/ struc_to_use ,\
+ /*Def 1*/ "",\
+ /*Def 2*/ "stdin" ,\
+ /*Min_value*/ "any" ,\
+ /*Max Value*/ "any" \
+ );
+
+/*PARAMETER PROTOTYPE: COMPARISON IO */
+ declare_name (comparison_io);
+ get_cl_param(\
+ /*argc*/ argc ,\
+ /*argv*/ argv ,\
+ /*output*/ &le ,\
+ /*Name*/ "-io_format" ,\
+ /*Flag*/ &garbage ,\
+ /*TYPE*/ "S" ,\
+ /*OPTIONAL?*/ OPTIONAL ,\
+ /*MAX Nval*/ 200 ,\
+ /*DOC*/ "ND" ,\
+ /*Parameter*/ &comparison_io,\
+ /*Def 1*/ "hsgd0123456",\
+ /*Def 2*/ "" ,\
+ /*Min_value*/ "any" ,\
+ /*Max Value*/ "any" \
+ );
+/*PARAMETER PROTOTYPE: ALN */
+ declare_name (aln);
+ get_cl_param(\
+ /*argc*/ argc ,\
+ /*argv*/ argv ,\
+ /*output*/ &le ,\
+ /*Name*/ "-aln" ,\
+ /*Flag*/ &garbage ,\
+ /*TYPE*/ "S" ,\
+ /*OPTIONAL?*/ OPTIONAL ,\
+ /*MAX Nval*/ 1 ,\
+ /*DOC*/ "ND" ,\
+ /*Parameter*/ &aln,\
+ /*Def 1*/ "",\
+ /*Def 2*/ "stdin" ,\
+ /*Min_value*/ "any" ,\
+ /*Max Value*/ "any" \
+ );
+/*PARAMETER PROTOTYPE: ALN */
+
+ declare_name (repeat_seq);
+ get_cl_param(\
+ /*argc*/ argc ,\
+ /*argv*/ argv ,\
+ /*output*/ &le ,\
+ /*Name*/ "-repeat_seq" ,\
+ /*Flag*/ &garbage ,\
+ /*TYPE*/ "S" ,\
+ /*OPTIONAL?*/ OPTIONAL ,\
+ /*MAX Nval*/ 1 ,\
+ /*DOC*/ "ND" ,\
+ /*Parameter*/ &repeat_seq,\
+ /*Def 1*/ "",\
+ /*Def 2*/ "stdin" ,\
+ /*Min_value*/ "any" ,\
+ /*Max Value*/ "any" \
+ );
+
+/*PARAMETER PROTOTYPE: ALN */
+ declare_name (repeat_pdb);
+ get_cl_param(\
+ /*argc*/ argc ,\
+ /*argv*/ argv ,\
+ /*output*/ &le ,\
+ /*Name*/ "-repeat_pdb" ,\
+ /*Flag*/ &garbage ,\
+ /*TYPE*/ "S" ,\
+ /*OPTIONAL?*/ OPTIONAL ,\
+ /*MAX Nval*/ 1 ,\
+ /*DOC*/ "ND" ,\
+ /*Parameter*/ &repeat_pdb,\
+ /*Def 1*/ "",\
+ /*Def 2*/ "stdin" ,\
+ /*Min_value*/ "any" ,\
+ /*Max Value*/ "any" \
+ );
+
+/*PARAMETER PROTOTYPE: Nb to exclude */
+ get_cl_param(\
+ /*argc*/ argc ,\
+ /*argv*/ argv ,\
+ /*output*/ &le ,\
+ /*Name*/ "-n_excluded_nb" ,\
+ /*Flag*/ &garbage ,\
+ /*TYPE*/ "D" ,\
+ /*OPTIONAL?*/ OPTIONAL ,\
+ /*MAX Nval*/ 1 ,\
+ /*DOC*/ "Exclude the N Nb on each side of the central residue. -1 triggers an automatic setting equal to the window size corresponding to the sphere" ,\
+ /*Parameter*/ &n_excluded_nb ,\
+ /*Def 1*/ "-1" ,\
+ /*Def 2*/ "1" ,\
+ /*Min_value*/ "any" ,\
+ /*Max Value*/ "any" \
+ );
+/*PARAMETER PROTOTYPE: diatances to count */
+ get_cl_param(\
+ /*argc*/ argc ,\
+ /*argv*/ argv ,\
+ /*output*/ &le ,\
+ /*Name*/ "-similarity_threshold" ,\
+ /*Flag*/ &garbage ,\
+ /*TYPE*/ "F" ,\
+ /*OPTIONAL?*/ OPTIONAL ,\
+ /*MAX Nval*/ 1 ,\
+ /*DOC*/ "ND" ,\
+ /*Parameter*/ &similarity_threshold,\
+ /*Def 1*/ "70" ,\
+ /*Def 2*/ "70" ,\
+ /*Min_value*/ "any" ,\
+ /*Max Value*/ "any" \
+ );
+/*PARAMETER PROTOTYPE: diatances to count */
+ get_cl_param(\
+ /*argc*/ argc ,\
+ /*argv*/ argv ,\
+ /*output*/ &le ,\
+ /*Name*/ "-filter" ,\
+ /*Flag*/ &garbage ,\
+ /*TYPE*/ "F" ,\
+ /*OPTIONAL?*/ OPTIONAL ,\
+ /*MAX Nval*/ 1 ,\
+ /*DOC*/ "Filter by only keeping the best quantile" ,\
+ /*Parameter*/ &filter,\
+ /*Def 1*/ "1.00" ,\
+ /*Def 2*/ "1.00" ,\
+ /*Min_value*/ "-1.00" ,\
+ /*Max Value*/ "1.00" \
+ );
+/*PARAMETER PROTOTYPE: diatances to count */
+ get_cl_param(\
+ /*argc*/ argc ,\
+ /*argv*/ argv ,\
+ /*output*/ &le ,\
+ /*Name*/ "-filter_aln" ,\
+ /*Flag*/ &garbage ,\
+ /*TYPE*/ "D" ,\
+ /*OPTIONAL?*/ OPTIONAL ,\
+ /*MAX Nval*/ 1 ,\
+ /*DOC*/ "Lower Case For Residues Filtered Out" ,\
+ /*Parameter*/ &filter_aln,\
+ /*Def 1*/ "0" ,\
+ /*Def 2*/ "1" ,\
+ /*Min_value*/ "0" ,\
+ /*Max Value*/ "1" \
+ );
+/*PARAMETER PROTOTYPE: diatances to count */
+ get_cl_param(\
+ /*argc*/ argc ,\
+ /*argv*/ argv ,\
+ /*output*/ &le ,\
+ /*Name*/ "-irmsd_graph" ,\
+ /*Flag*/ &garbage ,\
+ /*TYPE*/ "D" ,\
+ /*OPTIONAL?*/ OPTIONAL ,\
+ /*MAX Nval*/ 1 ,\
+ /*DOC*/ "Outputs the irmsd, position/position" ,\
+ /*Parameter*/ &irmsd_graph,\
+ /*Def 1*/ "0" ,\
+ /*Def 2*/ "1" ,\
+ /*Min_value*/ "0" ,\
+ /*Max Value*/ "1" \
+ );
+/*PARAMETER PROTOTYPE: diatances to count */
+ get_cl_param(\
+ /*argc*/ argc ,\
+ /*argv*/ argv ,\
+ /*output*/ &le ,\
+ /*Name*/ "-nirmsd_graph" ,\
+ /*Flag*/ &garbage ,\
+ /*TYPE*/ "D" ,\
+ /*OPTIONAL?*/ OPTIONAL ,\
+ /*MAX Nval*/ 1 ,\
+ /*DOC*/ "Outputs the NIRMSD VS N Removed Residues Curve" ,\
+ /*Parameter*/ &nirmsd_graph,\
+ /*Def 1*/ "0" ,\
+ /*Def 2*/ "1" ,\
+ /*Min_value*/ "0" ,\
+ /*Max Value*/ "1" \
+ );
+/*PARAMETER PROTOTYPE: -rmsd_threshold */
+ get_cl_param(\
+ /*argc*/ argc ,\
+ /*argv*/ argv ,\
+ /*output*/ &le ,\
+ /*Name*/ "-md_threshold" ,\
+ /*Flag*/ &garbage ,\
+ /*TYPE*/ "F" ,\
+ /*OPTIONAL?*/ OPTIONAL ,\
+ /*MAX Nval*/ 1 ,\
+ /*DOC*/ "ND" ,\
+ /*Parameter*/ &md_threshold ,\
+ /*Def 1*/ "1" ,\
+ /*Def 2*/ "1" ,\
+ /*Min_value*/ "any" ,\
+ /*Max Value*/ "any" \
+ );
+
+/*PARAMETER PROTOTYPE: -maximum distances */
+ get_cl_param(\
+ /*argc*/ argc ,\
+ /*argv*/ argv ,\
+ /*output*/ &le ,\
+ /*Name*/ "-maximum_distance" ,\
+ /*Flag*/ &garbage ,\
+ /*TYPE*/ "F" ,\
+ /*OPTIONAL?*/ OPTIONAL ,\
+ /*MAX Nval*/ 1 ,\
+ /*DOC*/ "ND" ,\
+ /*Parameter*/ &maximum_distance ,\
+ /*Def 1*/ "10" ,\
+ /*Def 2*/ "10" ,\
+ /*Min_value*/ "any" ,\
+ /*Max Value*/ "any" \
+ );
+
+
+/*PARAMETER PROTOTYPE: -print_rapdb */
+ get_cl_param(\
+ /*argc*/ argc ,\
+ /*argv*/ argv ,\
+ /*output*/ &le ,\
+ /*Name*/ "-print_rapdb" ,\
+ /*Flag*/ &garbage ,\
+ /*TYPE*/ "D" ,\
+ /*OPTIONAL?*/ OPTIONAL ,\
+ /*MAX Nval*/ 1 ,\
+ /*DOC*/ "Prints the neighborhood of each pair of aligned residues, along with the associated local score" ,\
+ /*Parameter*/ &print_rapdb ,\
+ /*Def 1*/ "0" ,\
+ /*Def 2*/ "1" ,\
+ /*Min_value*/ "any" ,\
+ /*Max Value*/ "any" \
+ );
+
+/*PARAMETER PROTOTYPE: RUN_NAME */
+ declare_name (run_name);
+ get_cl_param(\
+ /*argc*/ argc ,\
+ /*argv*/ argv ,\
+ /*output*/ &le ,\
+ /*Name*/ "-run_name" ,\
+ /*Flag*/ &garbage ,\
+ /*TYPE*/ "W_F" ,\
+ /*OPTIONAL?*/ OPTIONAL ,\
+ /*MAX Nval*/ 1 ,\
+ /*DOC*/ "ND" ,\
+ /*Parameter*/ &run_name ,\
+ /*Def 1*/ "default" ,\
+ /*Def 2*/ "" ,\
+ /*Min_value*/ "default" ,\
+ /*Max Value*/ "any" \
+ );
+/*PARAMETER PROTOTYPE: OUTFILE */
+/*PARAMETER PROTOTYPE: OUTFILE */
+ declare_name ( outfile);
+ get_cl_param(\
+ /*argc*/ argc ,\
+ /*argv*/ argv ,\
+ /*output*/ &le ,\
+ /*Name*/ "-outfile" ,\
+ /*Flag*/ &garbage ,\
+ /*TYPE*/ "W_F" ,\
+ /*OPTIONAL?*/ OPTIONAL ,\
+ /*MAX Nval*/ 1 ,\
+ /*DOC*/ "ND" ,\
+ /*Parameter*/ &outfile ,\
+ /*Def 1*/ "no" ,\
+ /*Def 2*/ "default" ,\
+ /*Min_value*/ "default" ,\
+ /*Max Value*/ "any" \
+ );
+/*PARAMETER PROTOTYPE: OUTFILE */
+ declare_name ( apdb_outfile);
+ get_cl_param(\
+ /*argc*/ argc ,\
+ /*argv*/ argv ,\
+ /*output*/ &le ,\
+ /*Name*/ "-apdb_outfile" ,\
+ /*Flag*/ &garbage ,\
+ /*TYPE*/ "W_F" ,\
+ /*OPTIONAL?*/ OPTIONAL ,\
+ /*MAX Nval*/ 1 ,\
+ /*DOC*/ "ND" ,\
+ /*Parameter*/ &apdb_outfile ,\
+ /*Def 1*/ "stdout" ,\
+ /*Def 2*/ "default" ,\
+ /*Min_value*/ "any" ,\
+ /*Max Value*/ "any" \
+ );
+
+/*PARAMETER PROTOTYPE: OUTPUT_FORMAT */
+ out_aln_format=declare_char ( 200, STRING);
+ n_out_aln_format=get_cl_param(\
+ /*argc*/ argc ,\
+ /*argv*/ argv ,\
+ /*output*/ &le ,\
+ /*Name*/ "-output" ,\
+ /*Flag*/ &garbage ,\
+ /*TYPE*/ "S" ,\
+ /*OPTIONAL?*/ OPTIONAL ,\
+ /*MAX Nval*/ 200 ,\
+ /*DOC*/ "ND" ,\
+ /*Parameter*/ out_aln_format,\
+ /*Def 1*/ "score_html" ,\
+ /*Def 2*/ "" ,\
+ /*Min_value*/ "any" ,\
+ /*Max Value*/ "any" \
+ );
+
+
+
+/*PARAMETER PROTOTYPE: INFILE */
+ declare_name (color_mode);
+ get_cl_param(\
+ /*argc*/ argc ,\
+ /*argv*/ argv ,\
+ /*output*/ &le ,\
+ /*Name*/ "-color_mode" ,\
+ /*Flag*/ &garbage ,\
+ /*TYPE*/ "S" ,\
+ /*OPTIONAL?*/ OPTIONAL ,\
+ /*MAX Nval*/ 1 ,\
+ /*DOC*/ "ND" ,\
+ /*Parameter*/ &color_mode ,\
+ /*Def 1*/ "apdb" ,\
+ /*Def 2*/ "irmsd" ,\
+ /*Min_value*/ "any" ,\
+ /*Max Value*/ "any" \
+ );
+/*PARAMETER PROTOTYPE: INFILE */
+ declare_name (output_res_num);
+ get_cl_param(\
+ /*argc*/ argc ,\
+ /*argv*/ argv ,\
+ /*output*/ &le ,\
+ /*Name*/ "-seqnos" ,\
+ /*Flag*/ &garbage ,\
+ /*TYPE*/ "S" ,\
+ /*OPTIONAL?*/ OPTIONAL ,\
+ /*MAX Nval*/ 1 ,\
+ /*DOC*/ "ND" ,\
+ /*Parameter*/ &output_res_num ,\
+ /*Def 1*/ "off" ,\
+ /*Def 2*/ "on" ,\
+ /*Min_value*/ "any" ,\
+ /*Max Value*/ "any" \
+ );
+ declare_name (cache);
+ get_cl_param(\
+ /*argc*/ argc ,\
+ /*argv*/ argv ,\
+ /*output*/ &le ,\
+ /*Name*/ "-cache" ,\
+ /*Flag*/ &garbage ,\
+ /*TYPE*/ "W_F" ,\
+ /*OPTIONAL?*/ OPTIONAL ,\
+ /*MAX Nval*/ 1 ,\
+ /*DOC*/ "use,ignore,update,local, directory name" ,\
+ /*Parameter*/ &cache ,\
+ /*Def 1*/ "use" ,\
+ /*Def 2*/ "update" ,\
+ /*Min_value*/ "any" ,\
+ /*Max Value*/ "any" \
+ );
+
+ declare_name (local_mode);
+ get_cl_param( \
+ /*argc*/ argc ,\
+ /*argv*/ argv ,\
+ /*output*/ &le ,\
+ /*Name*/ "-local_mode" ,\
+ /*Flag*/ &garbage ,\
+ /*TYPE*/ "W_F" ,\
+ /*OPTIONAL?*/ OPTIONAL ,\
+ /*MAX Nval*/ 1 ,\
+ /*DOC*/ "Mode for choosing the Neighborhood (bubble or window)\nWhen selecting window, maximum distance becomes the window 1/2 size, in residues\nWhen using sphere, maximum_distance is the sphere radius in Angstrom" ,\
+ /*Parameter*/ &local_mode ,\
+ /*Def 1*/ "sphere" ,\
+ /*Def 2*/ "window" ,\
+ /*Min_value*/ "any" ,\
+ /*Max Value*/ "any" \
+ );
+
+/*PARAMETER PROTOTYPE: IN */
+ template_file_list=declare_char (100, STRING);
+ n_template_file=get_cl_param( \
+ /*argc*/ argc , \
+ /*argv*/ argv , \
+ /*output*/ &le ,\
+ /*Name*/ "-template_file" ,\
+ /*Flag*/ &garbage ,\
+ /*TYPE*/ "S" ,\
+ /*OPTIONAL?*/ OPTIONAL ,\
+ /*MAX Nval*/ 1000 ,\
+ /*DOC*/ "List of templates file for the sequences",\
+ /*Parameter*/ template_file_list , \
+ /*Def 1*/ "_SELF_P_",\
+ /*Def 2*/ "stdin" ,\
+ /*Min_value*/ "any" ,\
+ /*Max Value*/ "any" \
+ );
+ /*PARAMETER PROTOTYPE: MODE */
+ declare_name (mode);
+ get_cl_param( \
+ /*argc*/ argc , \
+ /*argv*/ argv , \
+ /*output*/ &le ,\
+ /*Name*/ "-mode" ,\
+ /*Flag*/ &garbage ,\
+ /*TYPE*/ "S" ,\
+ /*OPTIONAL?*/ OPTIONAL ,\
+ /*MAX Nval*/ 1 ,\
+ /*DOC*/ "Mode: irmsd, ",\
+ /*Parameter*/ &mode , \
+ /*Def 1*/ "irmsd",\
+ /*Def 2*/ "stdin" ,\
+ /*Min_value*/ "any" ,\
+ /*Max Value*/ "any" \
+ );
+
+
+
+ get_cl_param(\
+ /*argc*/ argc ,\
+ /*argv*/ argv ,\
+ /*output*/ &le ,\
+ /*Name*/ "-prot_min_sim" ,\
+ /*Flag*/ &prot_min_sim ,\
+ /*TYPE*/ "D" ,\
+ /*OPTIONAL?*/ OPTIONAL ,\
+ /*MAX Nval*/ 1 ,\
+ /*DOC*/ "Minimum similarity between a sequence and its PDB target" ,\
+ /*Parameter*/ &prot_min_sim ,\
+ /*Def 1*/ "0" ,\
+ /*Def 2*/ "20" ,\
+ /*Min_value*/ "any" ,\
+ /*Max Value*/ "any" \
+ );
+ set_int_variable ("prot_min_sim", prot_min_sim);
+
+get_cl_param(\
+ /*argc*/ argc ,\
+ /*argv*/ argv ,\
+ /*output*/ &le ,\
+ /*Name*/ "-prot_max_sim" ,\
+ /*Flag*/ &prot_max_sim ,\
+ /*TYPE*/ "D" ,\
+ /*OPTIONAL?*/ OPTIONAL ,\
+ /*MAX Nval*/ 1 ,\
+ /*DOC*/ "Maximum similarity between a sequence and its BLAST relatives" ,\
+ /*Parameter*/ &prot_max_sim ,\
+ /*Def 1*/ "90" ,\
+ /*Def 2*/ "100" ,\
+ /*Min_value*/ "any" ,\
+ /*Max Value*/ "any" \
+ );
+ set_int_variable ("prot_max_sim", prot_max_sim);
+
+get_cl_param(\
+ /*argc*/ argc ,\
+ /*argv*/ argv ,\
+ /*output*/ &le ,\
+ /*Name*/ "-prot_min_cov" ,\
+ /*Flag*/ &prot_min_cov ,\
+ /*TYPE*/ "D" ,\
+ /*OPTIONAL?*/ OPTIONAL ,\
+ /*MAX Nval*/ 1 ,\
+ /*DOC*/ "Minimum coverage of a sequence by its BLAST relatives" ,\
+ /*Parameter*/ &prot_min_cov ,\
+ /*Def 1*/ "0" ,\
+ /*Def 2*/ "0" ,\
+ /*Min_value*/ "any" ,\
+ /*Max Value*/ "any" \
+ );
+set_int_variable ("prot_min_cov", prot_min_cov);
+
+get_cl_param(\
+ /*argc*/ argc ,\
+ /*argv*/ argv ,\
+ /*output*/ &le ,\
+ /*Name*/ "-pdb_min_sim" ,\
+ /*Flag*/ &pdb_min_sim ,\
+ /*TYPE*/ "D" ,\
+ /*OPTIONAL?*/ OPTIONAL ,\
+ /*MAX Nval*/ 1 ,\
+ /*DOC*/ "Minimum similarity between a sequence and its PDB target" ,\
+ /*Parameter*/ &pdb_min_sim ,\
+ /*Def 1*/ "35" ,\
+ /*Def 2*/ "35" ,\
+ /*Min_value*/ "any" ,\
+ /*Max Value*/ "any" \
+ );
+
+ set_int_variable ("pdb_min_sim", pdb_min_sim);
+ get_cl_param( \
+ /*argc*/ argc ,\
+ /*argv*/ argv ,\
+ /*output*/ &le ,\
+ /*Name*/ "-pdb_max_sim" ,\
+ /*Flag*/ &pdb_max_sim ,\
+ /*TYPE*/ "D" ,\
+ /*OPTIONAL?*/ OPTIONAL ,\
+ /*MAX Nval*/ 1 ,\
+ /*DOC*/ "Maximum similarity between a sequence and its PDB target" ,\
+ /*Parameter*/ &pdb_max_sim ,\
+ /*Def 1*/ "100" ,\
+ /*Def 2*/ "0" ,\
+ /*Min_value*/ "any" ,\
+ /*Max Value*/ "any" \
+ );
+ set_int_variable ("pdb_max_sim", pdb_max_sim);
+ get_cl_param( \
+ /*argc*/ argc ,\
+ /*argv*/ argv ,\
+ /*output*/ &le ,\
+ /*Name*/ "-pdb_min_cov" ,\
+ /*Flag*/ &pdb_min_cov ,\
+ /*TYPE*/ "D" ,\
+ /*OPTIONAL?*/ OPTIONAL ,\
+ /*MAX Nval*/ 1 ,\
+ /*DOC*/ "Minimum coverage of a sequence by its PDB target" ,\
+ /*Parameter*/ &pdb_min_cov ,\
+ /*Def 1*/ "50" ,\
+ /*Def 2*/ "25" ,\
+ /*Min_value*/ "any" ,\
+ /*Max Value*/ "any" \
+ );
+set_int_variable ("pdb_min_cov", pdb_min_cov);
+
+
+
+declare_name (pdb_blast_server);
+ get_cl_param(\
+ /*argc*/ argc ,\
+ /*argv*/ argv ,\
+ /*output*/ &le ,\
+ /*Name*/ "-pdb_blast_server" ,\
+ /*Flag*/ &garbage ,\
+ /*TYPE*/ "W_F" ,\
+ /*OPTIONAL?*/ OPTIONAL ,\
+ /*MAX Nval*/ 1 ,\
+ /*DOC*/ "ND" ,\
+ /*Parameter*/&pdb_blast_server ,\
+ /*Def 1*/ "EBI" ,\
+ /*Def 2*/ "default" ,\
+ /*Min_value*/ "any" ,\
+ /*Max Value*/ "any" \
+ );
+declare_name (prot_blast_server);
+ get_cl_param(\
+ /*argc*/ argc ,\
+ /*argv*/ argv ,\
+ /*output*/ &le ,\
+ /*Name*/ "-blast" ,\
+ /*Flag*/ &garbage ,\
+ /*TYPE*/ "W_F" ,\
+ /*OPTIONAL?*/ OPTIONAL ,\
+ /*MAX Nval*/ 1 ,\
+ /*DOC*/ "ND" ,\
+ /*Parameter*/&prot_blast_server ,\
+ /*Def 1*/ "" ,\
+ /*Def 2*/ "" ,\
+ /*Min_value*/ "any" ,\
+ /*Max Value*/ "any" \
+ );
+ //make sure that -blast and -blast_server are both supported blast>blast_server
+ if ( !prot_blast_server[0])
+ {
+ get_cl_param( \
+ /*argc*/ argc ,\
+ /*argv*/ argv ,\
+ /*output*/ &le ,\
+ /*Name*/ "-blast_server" ,\
+ /*Flag*/ &garbage ,\
+ /*TYPE*/ "W_F" ,\
+ /*OPTIONAL?*/ OPTIONAL ,\
+ /*MAX Nval*/ 1 ,\
+ /*DOC*/ "ND" ,\
+ /*Parameter*/&prot_blast_server ,\
+ /*Def 1*/ "EBI" ,\
+ /*Def 2*/ "default" ,\
+ /*Min_value*/ "any" ,\
+ /*Max Value*/ "any" \
+ );
+ }
+ set_string_variable ("blast_server", prot_blast_server);
+
+
+
+ declare_name (pdb_db);
+ get_cl_param(\
+ /*argc*/ argc ,\
+ /*argv*/ argv ,\
+ /*output*/ &le ,\
+ /*Name*/ "-pdb_db" ,\
+ /*Flag*/ &garbage ,\
+ /*TYPE*/ "W_F" ,\
+ /*OPTIONAL?*/ OPTIONAL ,\
+ /*MAX Nval*/ 1 ,\
+ /*DOC*/ "Non Redundant PDB database" ,\
+ /*Parameter*/&pdb_db ,\
+ /*Def 1*/ "pdb" ,\
+ /*Def 2*/ "default" ,\
+ /*Min_value*/ "any" ,\
+ /*Max Value*/ "any" \
+ );
+ set_string_variable ("pdb_db", pdb_db);
+
+
+declare_name (prot_db);
+ get_cl_param(\
+ /*argc*/ argc ,\
+ /*argv*/ argv ,\
+ /*output*/ &le ,\
+ /*Name*/ "-protein_db" ,\
+ /*Flag*/ &garbage ,\
+ /*TYPE*/ "W_F" ,\
+ /*OPTIONAL?*/ OPTIONAL ,\
+ /*MAX Nval*/ 1 ,\
+ /*DOC*/ "ND" ,\
+ /*Parameter*/&prot_db ,\
+ /*Def 1*/ "uniprot" ,\
+ /*Def 2*/ "default" ,\
+ /*Min_value*/ "any" ,\
+ /*Max Value*/ "any" \
+ );
+
+ get_cl_param( \
+ /*argc*/ argc ,\
+ /*argv*/ argv ,\
+ /*output*/ &le ,\
+ /*Name*/ "-gapped" ,\
+ /*Flag*/ &garbage ,\
+ /*TYPE*/ "D" ,\
+ /*OPTIONAL?*/ OPTIONAL ,\
+ /*MAX Nval*/ 1 ,\
+ /*DOC*/ "ND" ,\
+ /*Parameter*/&gapped ,\
+ /*Def 1*/ "0" ,\
+ /*Def 2*/ "1" ,\
+ /*Min_value*/ "any" ,\
+ /*Max Value*/ "any" \
+ );
+ get_cl_param( \
+ /*argc*/ argc ,\
+ /*argv*/ argv ,\
+ /*output*/ &le ,\
+ /*Name*/ "-min_ncol" ,\
+ /*Flag*/ &garbage ,\
+ /*TYPE*/ "D" ,\
+ /*OPTIONAL?*/ OPTIONAL ,\
+ /*MAX Nval*/ 1 ,\
+ /*DOC*/ "minimum number of columns (negative: fraction)" ,\
+ /*Parameter*/&min_ncol ,\
+ /*Def 1*/ "4" ,\
+ /*Def 2*/ "1" ,\
+ /*Min_value*/ "any" ,\
+ /*Max Value*/ "any" \
+ );
+ // set the correct mode:
+ if ( strm (argv[0], "trmsd"))sprintf (mode, "trmsd");
+
+ set_string_variable ("prot_db", prot_db);
+
+
+ if (argc==1){myexit (EXIT_SUCCESS);}
+
+ if ( strm (outfile,"no"))n_out_aln_format=0;
+
+ get_cl_param( argc, argv,&le, NULL,NULL,NULL,0,0,NULL);
+ prepare_cache (cache);
+
+
+ if (strm ( aln, ""))
+ sprintf ( aln, "%s", argv[1]);
+
+ if (!is_aln (aln))
+ {
+ printf_exit (EXIT_FAILURE, stderr, "\n\n---- ERROR: File %s must be a valid alignment [FATAL:%s-%s]\n\n",aln,argv[0], PROGRAM);
+ }
+
+ pdb_param=vcalloc ( 1, sizeof(Pdb_param));
+
+ pdb_param->similarity_threshold=similarity_threshold;
+
+ pdb_param->md_threshold=md_threshold;
+ pdb_param->maximum_distance=maximum_distance;
+
+ if ( n_excluded_nb>0)
+ pdb_param->n_excluded_nb=n_excluded_nb;
+ else if ( n_excluded_nb==-1)
+ pdb_param->n_excluded_nb=(int)((float)maximum_distance/(float)1.57);
+ /* Exclude all the nb within the bubble at +1, +2, +n*/
+ pdb_param->print_rapdb=print_rapdb;
+ pdb_param->comparison_io=comparison_io;
+
+ pdb_param->local_mode=local_mode;
+ pdb_param->color_mode=lower_string (color_mode);
+ pdb_param->filter=filter;
+ pdb_param->filter_aln=filter_aln;
+ pdb_param->irmsd_graph=irmsd_graph;
+ pdb_param->nirmsd_graph=nirmsd_graph;
+
+ sprintf ( list_file[n_list++], "S%s", aln);
+
+
+ if (!strm (repeat_seq, ""))
+ {
+
+ sprintf ( template_file_list[0], "%s", process_repeat (list_file[0], repeat_seq, repeat_pdb));
+ fprintf ( le, "\n##Turn a repeat List into a Template File\n");
+ le=display_file_content (le,template_file_list[0]);
+ fprintf ( le, "\n\n");
+ }
+ S=read_seq_in_n_list (list_file, n_list, NULL, NULL);
+
+ le=display_sequences_names ( S,le,0, 0);
+
+ if ( n_template_file)
+ {
+ fprintf ( le, "\nLooking For Sequence Templates:\n");
+ for ( a=0; a< n_template_file; a++)
+ {
+ fprintf ( le, "\n\tTemplate Type: [%s] Mode Or File: [%s] [Start", template_type2type_name(template_file_list[a]), template_file_list[a]);
+ S=seq2template_seq(S, template_file_list[a], F);
+ fprintf ( le, "]");
+ }
+ }
+
+ if ( !strm (run_name, "default"))
+ {
+ F=parse_fname(run_name);
+ sprintf (F->name, "%s", F->full);
+ }
+ else
+ {
+ F=parse_fname (aln);
+ }
+
+ for ( a=0; a< S->nseq; a++)
+ {
+ char *p;
+
+ p=seq2T_value (S, a, "template_file", "_P_");
+
+ if (p)sprintf (S->file[a], "%s",p);
+ }
+
+ CL=declare_constraint_list ( S,NULL, NULL, 0,NULL, NULL);
+ CL->T=vcalloc (S->nseq,sizeof (Ca_trace*));
+
+
+ for ( n_pdb=0,a=0; a<S->nseq; a++)
+ {
+ if ( !is_pdb_file ( S->file[a])){CL->T[a]=NULL;continue;}
+ CL->T[a]=read_ca_trace (S->file[a], "ATOM");
+ CL->T[a]=trim_ca_trace (CL->T[a], S->seq[a]);
+ (CL->T[a])->pdb_param=pdb_param;
+ n_pdb++;
+ }
+
+ A=declare_aln (S);
+
+
+ A->residue_case=KEEP_CASE;
+ A=main_read_aln(aln, A);
+ EA=copy_aln (A, EA);
+ A->CL=CL;
+
+ if ( strm (apdb_outfile, "default"))
+ sprintf ( apdb_outfile, "%s.apdb_result", F->name);
+
+ if ( n_pdb<2)
+ {
+ FILE *fp;
+ fp=vfopen (apdb_outfile, "w");
+ fprintf (fp, "\nYour Alignment Does Not Contain Enough Sequences With a known Structure\n");
+ fprintf (fp, "To Use APDB, your alignment must include at least TWO sequences with a known structure.\n");
+ fprintf (fp, "These sequences must be named according to their PDB identifier, followed by the chain index (if any) ex: 1fnkA\n");
+ fprintf (fp, "[FATAL:%s]\n", PROGRAM);
+ vfclose (fp);
+ }
+ else if ( strm (mode, "irmsd"))
+ {
+ EA=analyse_pdb ( A, EA, apdb_outfile);
+ }
+ else if ( strm (mode, "msa2tree") || strm (mode, "trmsd"))
+ {
+ EA=msa2struc_dist ( A, EA,F->name, gapped, min_ncol);
+ }
+ le=display_output_filename ( le, "APDB_RESULT", "APDB_RESULT_FORMAT_01", apdb_outfile, CHECK);
+
+ if ( n_pdb>=2)
+ {
+ declare_name (file_name);
+ for ( a=0; a< n_out_aln_format; a++)
+ {
+ if ( strm2( outfile, "stdout", "stderr"))sprintf (file_name, "%s", outfile);
+ else if ( strm (outfile, "default"))
+ sprintf (file_name, "%s.%s",F->name, out_aln_format[a]);
+ else
+ sprintf (file_name, "%s.%s",outfile,out_aln_format[a]);
+
+ output_format_aln (out_aln_format[a],A,EA,file_name);
+ le=display_output_filename ( le, "MSA", out_aln_format[a], file_name, CHECK);
+ }
+ }
+ return EXIT_SUCCESS;
+ }
+
+
+
+Constraint_list * set_constraint_list4align_pdb (Constraint_list *CL,int seq, char *dp_mode, char *local_mode, char *param_file)
+{
+ static Constraint_list *PWCL;
+ static Pdb_param *pdb_param;
+ char **x;
+ int n;
+
+ if ( !CL)
+ {
+ free_constraint_list (PWCL);
+ return NULL;
+ }
+ else if ( !PWCL)
+ {
+ PWCL=declare_constraint_list ( CL->S,NULL, NULL, 0,NULL, NULL);
+
+ pdb_param=vcalloc ( 1, sizeof(Pdb_param));
+ pdb_param->N_ca=0;
+ pdb_param->max_delta=2.0;
+ pdb_param->maximum_distance=14;
+ declare_name (pdb_param->local_mode);
+ sprintf (pdb_param->local_mode, "%s", local_mode);
+ pdb_param->scale=50;
+
+ PWCL->pw_parameters_set=1;
+ PWCL->S=CL->S;
+ PWCL->lalign_n_top=10;
+ PWCL->sw_min_dist=10;
+
+ PWCL->T=vcalloc ( (PWCL->S)->nseq, sizeof (Ca_trace*));
+
+ PWCL->extend_jit=0;
+ PWCL->maximise=1;
+ /*PWCL->gop=-40;*/
+ PWCL->gop=-50;
+ PWCL->gep=-20;
+ sprintf (CL->matrix_for_aa_group, "vasiliky");
+ PWCL->use_fragments=0;
+ PWCL->ktup=0;
+ PWCL->TG_MODE=1;
+ }
+
+
+ if ( param_file && check_file_exists ( param_file) )
+ {
+ if ( (x=get_parameter ( "-nca", &n, param_file))!=NULL){pdb_param->N_ca=atoi(x[0]);free_char (x, -1);}
+ if ( (x=get_parameter ( "-max_delta", &n, param_file))!=NULL){pdb_param->max_delta=atof(x[0]);free_char (x, -1);}
+ if ( (x=get_parameter ( "-maximum_distance", &n, param_file))!=NULL){pdb_param->maximum_distance=atoi(x[0]);free_char (x, -1);}
+ if ( (x=get_parameter ( "-local_mode", &n, param_file))!=NULL){sprintf (pdb_param->local_mode, "%s",x[0]);free_char (x, -1);}
+ if ( (x=get_parameter ( "-scale", &n, param_file))!=NULL){pdb_param->scale=atoi(x[0]);free_char (x, -1);}
+ if ( (x=get_parameter ( "-gapopen", &n, param_file))!=NULL){PWCL->gop=atoi(x[0]);free_char (x, -1);}
+ if ( (x=get_parameter ( "-gapext" , &n, param_file))!=NULL){PWCL->gep=atof(x[0]);free_char (x, -1);}
+
+ }
+
+
+
+
+ sprintf ( PWCL->dp_mode, "%s", dp_mode);
+
+ if (strm (PWCL->dp_mode, "lalign"))sprintf (PWCL->dp_mode,"sim_pair_wise_lalign");
+ else if (strm (PWCL->dp_mode, "sw"))sprintf (PWCL->dp_mode,"gotoh_pair_wise_sw");
+
+ local_mode=pdb_param->local_mode;
+ if ( strm ( local_mode, "hasch_ca_trace_nb")) PWCL->evaluate_residue_pair=evaluate_ca_trace_nb;
+ else if ( strm ( local_mode, "hasch_ca_trace_bubble")) PWCL->evaluate_residue_pair=evaluate_ca_trace_bubble;
+ else if ( strm ( local_mode, "hasch_ca_trace_sap1_bubble")) PWCL->evaluate_residue_pair=evaluate_ca_trace_sap1_bubble;
+ else if ( strm ( local_mode, "hasch_ca_trace_sap2_bubble")) PWCL->evaluate_residue_pair=evaluate_ca_trace_sap2_bubble;
+
+ else if ( strm ( local_mode, "hasch_ca_trace_transversal")) PWCL->evaluate_residue_pair=evaluate_ca_trace_transversal;
+ else if ( strm ( local_mode, "hasch_ca_trace_bubble_2")) PWCL->evaluate_residue_pair=evaluate_ca_trace_bubble_2;
+ else if ( strm ( local_mode, "hasch_ca_trace_bubble_3")) PWCL->evaluate_residue_pair=evaluate_ca_trace_bubble_3;
+ else if ( strm ( local_mode, "custom_pair_score_function1")) PWCL->evaluate_residue_pair=custom_pair_score_function1;
+ else if ( strm ( local_mode, "custom_pair_score_function2")) PWCL->evaluate_residue_pair=custom_pair_score_function2;
+ else if ( strm ( local_mode, "custom_pair_score_function3")) PWCL->evaluate_residue_pair=custom_pair_score_function3;
+ else if ( strm ( local_mode, "custom_pair_score_function4")) PWCL->evaluate_residue_pair=custom_pair_score_function4;
+ else if ( strm ( local_mode, "custom_pair_score_function5")) PWCL->evaluate_residue_pair=custom_pair_score_function5;
+ else if ( strm ( local_mode, "custom_pair_score_function6")) PWCL->evaluate_residue_pair=custom_pair_score_function6;
+ else if ( strm ( local_mode, "custom_pair_score_function7")) PWCL->evaluate_residue_pair=custom_pair_score_function7;
+ else if ( strm ( local_mode, "custom_pair_score_function8")) PWCL->evaluate_residue_pair=custom_pair_score_function8;
+ else if ( strm ( local_mode, "custom_pair_score_function9")) PWCL->evaluate_residue_pair=custom_pair_score_function9;
+ else if ( strm ( local_mode, "custom_pair_score_function10")) PWCL->evaluate_residue_pair=custom_pair_score_function10;
+
+
+ else
+ {
+ fprintf ( stderr, "\n%s is an unknown hasch mode, [FATAL]\n", local_mode);
+ myexit (EXIT_FAILURE);
+ }
+
+ if ( PWCL->T[seq]);
+ else
+ {
+ PWCL->T[seq]=read_ca_trace (is_pdb_struc((CL->S)->name[seq]), "ATOM");
+ (PWCL->T[seq])->pdb_param=pdb_param;
+ PWCL->T[seq]=trim_ca_trace (PWCL->T[seq], (CL->S)->seq[seq]);
+ PWCL->T[seq]=hasch_ca_trace(PWCL->T[seq]);
+
+ }
+
+
+ return PWCL;
+}
+
+
+
+int evaluate_ca_trace_nb (Constraint_list *CL, int s1, int r1, int s2, int r2)
+ {
+
+ return (int)(neighborhood_match(CL, s1,r1, s2, r2, (CL->T[s1])->Chain,(CL->T[s2])->Chain ));
+ }
+int evaluate_ca_trace_sap2_bubble (Constraint_list *CL, int s1, int r1, int s2, int r2)
+ {
+
+
+
+ return sap2_neighborhood_match (CL, s1, r1, s2, r2, (CL->T[s1])->Bubble,(CL->T[s2])->Bubble );
+
+ }
+int evaluate_ca_trace_sap1_bubble (Constraint_list *CL, int s1, int r1, int s2, int r2)
+ {
+ /*
+ Function documentation: start
+
+ int evaluate_ca_trace_sap1_bubble (Constraint_list *CL, int s1, int s2, int r1, int r2)
+ This function evaluates the cost for matching two residues:
+
+ a1 is the cost for matching the two neighborood ( bubble type), using sap
+ a1: [0,+100], +100 is the best possible match.
+ a2 is the residue type weight:
+ min=worst substitution value
+ best=best of r1/r1, r2/r2-min
+
+ a2=(r1/r2 -min)/best --> a1:[0, 100]
+
+ score=a1*a2-->[-inf, +10000];
+ */
+
+
+
+ float a1;
+
+
+ a1=(int) sap1_neighborhood_match (CL, s1, r1, s2, r2, (CL->T[s1])->Bubble,(CL->T[s2])->Bubble );
+
+ return (int)a1;
+
+
+ }
+int evaluate_ca_trace_bubble (Constraint_list *CL, int s1, int r1, int s2, int r2)
+ {
+ /*
+ Function documentation: start
+
+ int evaluate_ca_trace_bubble (Constraint_list *CL, int s1, int s2, int r1, int r2)
+ This function evaluates the cost for matching two residues:
+
+ a1 is the cost for matching the two neighborood ( bubble type)
+ a1: [-inf,+100-scale], +100-scale is the best possible match.
+
+ */
+
+
+
+ float a1;
+
+
+
+ a1=(int) neighborhood_match (CL, s1, r1, s2, r2, (CL->T[s1])->Bubble,(CL->T[s2])->Bubble )-((CL->T[s1])->pdb_param)->scale;
+
+ return a1;
+
+
+ }
+int evaluate_ca_trace_transversal (Constraint_list *CL, int s1, int r1, int s2, int r2)
+ {
+ return (int)(transversal_match (CL, s1, r1, s2, r2, (CL->T[s1])->Transversal,(CL->T[s2])->Transversal ));
+ }
+
+int evaluate_ca_trace_bubble_3 (Constraint_list *CL, int s1, int r1, int s2, int r2)
+ {
+ /*This Mode evaluates :
+
+ 1-The Bubble
+ 2-The Match of the transversal residues
+ */
+
+ int a1, l1;
+ int a2, l2;
+ int a;
+
+ l1=MAX(( (CL->T[s1])->Chain )->nb[r1][0] ,((CL->T[s2])->Chain )->nb[r2][0]);
+ l2=MAX(( (CL->T[s1])->Bubble)->nb[r1][0], ((CL->T[s2])->Bubble)->nb[r2][0]);
+
+ a1=(int)(neighborhood_match (CL, s1, r1, s2, r2, (CL->T[s1])->Bubble,(CL->T[s2])->Bubble ));
+ a2=(int)(transversal_match (CL, s1, r1, s2, r2, (CL->T[s1])->Transversal,(CL->T[s2])->Transversal ));
+
+ if ( !l1 && !l2)return 0;
+ a=(a1+a2)/2;
+ return a;
+ }
+int evaluate_ca_trace_bubble_2 (Constraint_list *CL, int s1, int r1, int s2, int r2)
+ {
+ /*This Mode evaluates :
+ 1-The Ca neighborhood
+ 2-The Bubble
+ */
+
+
+ return (int)((neighborhood_match (CL, s1, r1, s2, r2, (CL->T[s1])->Chain,(CL->T[s2])->Chain )));
+ }
+
+
+/*********************************************************************************************/
+/* */
+/* FUNCTIONS FOR COMPARING TWO NEIGHBORHOODS:START */
+/* */
+/*********************************************************************************************/
+float matrix_match (Constraint_list *CL, int s1, int r1, int s2, int r2, Struct_nb *nbs1, Struct_nb *nbs2)
+
+ {
+ /*
+ Function documentation: start
+
+ float matrix_match (Constraint_list *CL, int s1, int s2, int r1, int r2, Struct_nb *nbs1, Struct_nb *nbs2)
+ This function evaluates the matrix for matching two residues:
+
+ min=worst substitution value
+ best=best of r1/r1, r2/r2-min
+
+ a2=(r1/r2 -min)/best --> a1:[0, 100]
+
+ score=a1*a2-->[-inf, +10000];
+ */
+
+
+
+ float a2;
+ float m1, m2, m;
+ static float min=0;
+ int a, b;
+
+ if ( !CL->M)
+ {
+ CL->M=read_matrice ( "pam250mt");
+ min=CL->M[0][0];
+ for ( a=0; a< 26; a++)
+ for ( b=0; b< 26; b++)min=MIN(min, CL->M[a][b]);
+ }
+
+ if ( r1<=0 || r2<=0)return 0;
+ m1=CL->M[(CL->S)->seq[s1][r1-1]-'A'][(CL->S)->seq[s1][r1-1]-'A']-min;
+ m2=CL->M[(CL->S)->seq[s2][r2-1]-'A'][(CL->S)->seq[s2][r2-1]-'A']-min;
+ m=MAX(m1, m2);
+ a2=(CL->M[(CL->S)->seq[s1][r1-1]-'A'][(CL->S)->seq[s2][r2-1]-'A']-min)/m;
+
+ return a2;
+ }
+
+
+float transversal_match (Constraint_list *CL, int s1, int r1, int s2, int r2, Struct_nb *nbs1, Struct_nb *nbs2)
+ {
+ int a, l1, l2;
+ float score=0;
+ float delta, max_delta;
+ float max;
+ Pdb_param*PP;
+
+ PP=(CL->T[s1])->pdb_param;
+ max_delta=PP->max_delta;
+
+ l1=nbs1->nb[r1][0];
+ l2=nbs2->nb[r2][0];
+
+ if ( l1!=l2 || l1<(PP->N_ca)) return 0;
+
+
+ max=MAX(l1, l2)*max_delta;
+ for ( delta=0,a=0; a< l2 ; a++)
+ {
+
+ delta+=max_delta-FABS((nbs1->d_nb[r1][a]-nbs2->d_nb[r2][a]));
+ }
+ score=(delta*100)/max;
+
+
+
+ return score;
+ }
+
+float neighborhood_match (Constraint_list *CL, int s1, int r1, int s2, int r2, Struct_nb *nbs1, Struct_nb *nbs2)
+ {
+ static float **table;
+ static int table_size;
+ int a, b, l1, l2;
+ float score=0;
+ float ins, del, sub;
+ float delta, max_delta;
+ float max;
+ Pdb_param*PP;
+
+
+ PP=(CL->T[s1])->pdb_param;
+ max_delta=PP->max_delta;
+
+
+ if ( r1> 0 && r2 >0) {r1--; r2--;}
+ else return 0;
+
+ l1=nbs1->nb[r1][0];
+ l2=nbs2->nb[r2][0];
+
+ if (table_size< (MAX(l1, l2)+1))
+ {
+ table_size=MAX(l1, l2)+1;
+ if ( table)free_float (table, -1);
+ table=NULL;
+ }
+ if ( !table) table=declare_float (table_size, table_size);
+
+
+ max=MAX(l1, l2)*max_delta;
+ if ( max==0)return 0;
+
+
+ table[0][0]=0;
+ for ( b=1; b<=l2; b++)
+ {
+ table[0][b]=0;
+ }
+ for ( a=1; a<=l1; a++)
+ {
+ table[a][0]=0;
+ for ( b=1; b<=l2 ; b++)
+ {
+
+ delta=max_delta-FABS((nbs1->d_nb[r1][a]-nbs2->d_nb[r2][b]));
+
+ del=table[a-1][b];
+ ins=table[a][b-1];
+ sub= table[a-1][b-1]+delta;
+
+ if ( del >= ins && del >= sub)score=del;
+ else if ( ins >= del && ins >= sub) score=ins;
+ else score=sub;
+ table[a][b]=score;
+ }
+ }
+
+
+ score=((((score)*100)/max));
+
+
+ return score;
+ }
+
+float sap1_neighborhood_match (Constraint_list *CL, int s1, int r1, int s2, int r2, Struct_nb *nbs1, Struct_nb *nbs2)
+ {
+ /*
+ Function documentation: start
+
+ float sap1_neighborhood_match (Constraint_list *CL, int s1, int s2, int r1, int r2, Struct_nb *nbs1, Struct_nb *nbs2)
+ This function is adapted from Taylor, Orengo, Protein Structure Alignment JMB 1989, (208)1-22
+ It is the first function where
+ score= A/(|dra-drb|+b)
+
+ Function documentation: end
+ */
+
+ static float **table;
+ static int table_size;
+ int a, b, l1, l2;
+ float score=0;
+ float ins, del, sub;
+ float delta;
+ float max;
+
+ int A=50;
+ int B=5;
+
+
+
+
+
+
+ if ( r1> 0 && r2 >0) {r1--; r2--;}
+ else return 0;
+
+ l1=nbs1->nb[r1][0];
+ l2=nbs2->nb[r2][0];
+
+ if (table_size< (MAX(l1, l2)+1))
+ {
+ table_size=MAX(l1, l2)+1;
+ if ( table)free_float (table, -1);
+ table=NULL;
+ }
+ if ( !table) table=declare_float (table_size, table_size);
+
+
+ max=MAX(l1, l2)*(A/B);
+ if ( max==0)return 0;
+
+
+ table[0][0]=0;
+ for ( b=1; b<=l2; b++)
+ {
+ table[0][b]=0;
+ }
+ for ( a=1; a<=l1; a++)
+ {
+ table[a][0]=0;
+ for ( b=1; b<=l2 ; b++)
+ {
+
+ delta=A/(FABS((nbs1->d_nb[r1][a]-nbs2->d_nb[r2][b]))+B);
+
+ del=table[a-1][b];
+ ins=table[a][b-1];
+ sub= table[a-1][b-1]+delta;
+ if ( del >= ins && del >= sub)score=del;
+ else if ( ins >= del && ins >= sub) score=ins;
+ else score=sub;
+ table[a][b]=score;
+ }
+ }
+
+
+ score=((score*100))/(max);
+
+
+ return score;
+ }
+
+float sap2_neighborhood_match (Constraint_list *CL, int s1, int r1, int s2, int r2, Struct_nb *nbs1, Struct_nb *nbs2)
+ {
+ /*
+ Function documentation: start
+
+ float sap1_neighborhood_match (Constraint_list *CL, int s1, int s2, int r1, int r2, Struct_nb *nbs1, Struct_nb *nbs2)
+ This function is adapted from Taylor, Orengo, Protein Structure Alignment JMB 1989, (208)1-22
+ It is the first function where
+ score= A/(|dra-drb|+b)
+
+ Function documentation: end
+ */
+
+ static float **table;
+ static int table_size;
+ int a, b, l1, l2;
+ float score=0;
+ float ins, del, sub;
+ float delta;
+ float max;
+
+ Amino_acid **pep1;
+ Amino_acid **pep2;
+ static Atom *vX_1, *vY_1, *vZ_1;
+ static Atom *vX_2, *vY_2, *vZ_2;
+ static Atom *ca1, *ca2;
+ float val;
+
+ int A=50;
+ int B=2;
+
+
+
+
+ if ( r1> 0 && r2 >0) {r1--; r2--;}
+ else return 0;
+
+ /*Make up the referencial*/
+ pep1=(CL->T[s1])->peptide_chain;
+ pep2=(CL->T[s2])->peptide_chain;
+
+ /*Get Referencial for CA1*/
+ if ( (pep1[r1])->C)vX_1 =diff_atom(pep1[r1]->C,pep1[r1]->CA, vX_1);
+ if ( (pep1[r1])->N)vY_1 =diff_atom(pep1[r1]->N,pep1[r1]->CA, vY_1);
+ if ( (pep1[r1])->CB)vZ_1=diff_atom(pep1[r1]->CB,(pep1[r1])->CA,vZ_1);
+ else vZ_1=add_atom (vX_1, vY_1, vZ_1);
+
+
+
+
+
+ /*Get Referencial for CA2*/
+ if ( (pep2[r2])->C)vX_2 =diff_atom((pep2[r2])->C,(pep2[r2])->CA, vX_2);
+ if ( (pep2[r2])->N)vY_2 =diff_atom((pep2[r2])->N,(pep2[r2])->CA, vY_2);
+ if ( (pep2[r2])->CB)vZ_2=diff_atom((pep2[r2])->CB,(pep2[r2])->CA, vZ_2);
+ else vZ_2=add_atom (vX_2, vY_2, vZ_2);
+
+
+
+
+ /*END OF GETTING REFERENCIAL*/
+
+ /*Test
+ if ( r1>1 && r2>1)
+ {
+ fprintf (stdout,"\n\t*******");
+
+ fprintf (stdout, "RESIDUE %d %c", r1, (CL->S)->seq[s1][r1]);
+ if ( (pep1[r1])->CA)fprintf (stdout,"\n\tCA ");print_atom (pep1[r1]->CA );
+ if ( (pep1[r1])->C)fprintf (stdout,"\n\tC ");print_atom (pep1[r1]->C );
+ if ( (pep1[r1])->N)fprintf (stdout,"\n\tN ");print_atom (pep1[r1]->N );
+ if ( (pep1[r1])->CB)fprintf (stdout,"\n\tCB ");print_atom (pep1[r1]->CB );
+ fprintf (stdout,"\n\t*******");
+ fprintf (stdout,"\n\tvX ");print_atom ( vX_1);
+ fprintf (stdout,"\n\tvY ");print_atom ( vY_1);
+ fprintf (stdout,"\n\tvZ ");print_atom ( vZ_1);
+
+ ca1= copy_atom ((pep1[r1-1])->CA, ca1);
+ ca1 =diff_atom(ca1,(pep1[r1])->CA, ca1);
+ fprintf (stdout,"\n\tca ");print_atom ( ca1);
+ fprintf ( stdout, "\n\tSQ1=%d ", (int)square_atom(ca1));
+ ca1=reframe_atom(vX_1, vY_1, vZ_1, ca1, ca1);
+ fprintf ( stdout, "\n\tSQ2=%d ", (int)square_atom(ca1));
+ fprintf (stdout,"\n\tca ");print_atom ( ca1);
+ fprintf (stdout,"\n\n");
+ }
+ */
+
+ l1=nbs1->nb[r1][0];
+ l2=nbs2->nb[r2][0];
+
+ if (table_size< (MAX(l1, l2)+1))
+ {
+ table_size=MAX(l1, l2)+1;
+ if ( table)free_float (table, -1);
+ table=NULL;
+ }
+ if ( !table) table=declare_float (table_size, table_size);
+
+
+ max=MAX(l1, l2)*(A/B);
+
+ if ( max==0)return 0;
+
+
+ table[0][0]=0;
+ for ( b=1; b<=l2; b++)
+ {
+ table[0][b]=0;
+ }
+
+ for ( a=1; a<=l1; a++)
+ {
+ ca1=copy_atom ((CL->T[s1])->structure[nbs1->nb[r1][a]], ca1);
+ ca1=diff_atom(ca1,(pep1[r1])->CA, ca1);
+ ca1=reframe_atom(vX_1, vY_1, vZ_1, ca1, ca1);
+
+ table[a][0]=0;
+ for ( b=1; b<=l2 ; b++)
+ {
+ ca2 =copy_atom((CL->T[s2])->structure[nbs2->nb[r2][b]], ca2);
+ ca2 =diff_atom(ca2,(pep2[r2])->CA, ca2);
+ ca2 =reframe_atom(vX_2, vY_2, vZ_2, ca2, ca2);
+
+ ca2=diff_atom(ca2,ca1,ca2);
+ val=square_atom (ca2);
+
+ val=(float)sqrt ((double)val);
+
+ delta=A/(val+B);
+
+
+ del=table[a-1][b];
+ ins=table[a][b-1];
+ sub= table[a-1][b-1]+delta;
+
+ if ( del >= ins && del >= sub)score=del;
+ else if ( ins >= del && ins >= sub) score=ins;
+ else score=sub;
+ table[a][b]=score;
+ }
+ }
+
+
+ score=(((score*100))/(max)-50);
+
+
+ return score;
+ }
+
+/*********************************************************************************************/
+/* */
+/* APDB */
+/* */
+/*********************************************************************************************/
+float **** irmsdmin_apdb_filtration ( Alignment *A, float ****residues, Constraint_list *CL, Pdb_param *PP, FILE *fp)
+{
+ int s1, s2, a,col1, n,n2=0, t,flag;
+ int **pos, **list;
+ float nirmsd, min_nirmsd,max_nirmsd,ref_sum, sum, sum2;
+ float **normalized_len;
+
+ normalized_len=declare_float (A->nseq+1, A->nseq+1);
+ for (s1=0; s1<A->nseq; s1++)
+ {
+ int l1, l2, r1, r2, p;
+ for (s2=0; s2<A->nseq; s2++)
+ {
+ for ( l1=l2=p=0; p< A->len_aln; p++)
+ {
+ r1=A->seq_al[s1][p];
+ r2=A->seq_al[s2][p];
+ if (!is_gap(r1) && isupper(r1))l1++;
+ if (!is_gap(r2) && isupper(r2))l2++;
+ }
+ normalized_len[s1][s2]=MIN(l1,l2);
+ }
+ }
+
+ pos=aln2pos_simple (A, A->nseq);
+ for ( s1=0; s1< A->nseq; s1++)
+ for ( s2=0; s2<A->nseq; s2++)
+ {
+ if ( s1==s2) continue;
+ else if (!(CL->T[A->order[s1][0]]) || !(CL->T[A->order[s2][0]]))continue;
+
+ list=declare_int (A->len_aln, 2);
+
+ for ( sum=0,n=0,col1=0; col1< A->len_aln; col1++)
+ {
+ if ( islower (A->seq_al[s1][col1]) || islower ( A->seq_al[s2][col1]))continue;
+ else if ( pos[s1][col1]<=0 || pos[s2][col1]<=0 ) continue;
+ else if ( residues[s1][s2][pos[s1][col1]-1][0]==0)continue;
+
+ list[n][0]=pos[s1][col1]-1;
+ list[n][1]=(int)100000*residues[s1][s2][pos[s1][col1]-1][4];
+ sum2+=residues[s1][s2][pos[s1][col1]-1][4];
+ n++;
+ }
+
+ if (n==0)return residues;
+
+ sort_int_inv (list, 2, 1,0, n-1);
+ for (sum=0,a=0; a<n; a++)
+ {
+ sum+=list[a][1];
+ }
+ ref_sum=sum;
+ nirmsd=min_nirmsd=max_nirmsd=sum/(n*n);
+ t=0;
+
+
+ /*1 Find the maximum*/
+ sum=ref_sum;
+ for (flag=0,a=0; a< n-1; a++)
+ {
+ sum-=list[a][1];
+ nirmsd=sum/((n-(a+1))*(n-(a+1)));
+ if (nirmsd<max_nirmsd)flag=1;
+ if ((nirmsd>=max_nirmsd) && flag==1)break;
+ n2=a;
+ }
+
+ sum=ref_sum;
+ for (a=0; a<n2-1; a++)
+ {
+ sum-=list[a][1];
+ nirmsd=sum/((n-(a+1))*(n-(a+1)));
+
+
+ if ( nirmsd<min_nirmsd)
+ {
+ min_nirmsd=nirmsd;
+ t=a;
+ if ( PP->nirmsd_graph)
+ {
+ fprintf ( stdout, "\n_NIRMSD_GRAPH %s %s POS: %4d Removed: %4d NiRMSD: %.2f", A->name[s1], A->name[s2], list[a][0],a,(nirmsd/100000)*normalized_len[s1][s2]);
+ }
+ }
+ }
+
+ if ( PP->print_rapdb)
+ {
+ for ( a=0; a<n; a++)
+ {
+ if ( list[a][1]>0 && a<=t)fprintf ( stdout, "\nRAPDB QUANTILE REMOVE S1: %3d S2: %3d COL: %3d SCORE*100: %d", s1, s2, list[a][0], list[a][1]);
+ else if ( list[a][1]>0 && a>t)fprintf ( stdout, "\nRAPDB QUANTILE KEEP S1: %3d S2: %3d COL: %3d SCORE*100: %d", s1, s2, list[a][0], list[a][1]);
+ }
+ }
+
+ fprintf ( stdout, "\n# MINIMISATION FILTER ON: NiRMSD minimsation resulted in the removal of %d [out of %d] Columns On the alignment %s Vs %s\n", t, n, A->name[s1], A->name[s2]);
+ for ( a=0; a<=t; a++)
+ {
+
+ residues[s1][s2][list[a][0]][0]=0;
+ residues[s1][s2][list[a][0]][1]=0;
+ residues[s1][s2][list[a][0]][2]=0;
+ residues[s1][s2][list[a][0]][3]=0;
+ residues[s1][s2][list[a][0]][4]=-1;
+
+ }
+
+ free_int (list, -1);
+ }
+ free_float (normalized_len, -1);
+ return residues;
+}
+float **** quantile_apdb_filtration ( Alignment *A, float ****residues, Constraint_list *CL, Pdb_param *PP,FILE *fp)
+{
+ int s1, s2, a,col1, n, t;
+ int **pos, **list;
+
+ pos=aln2pos_simple (A, A->nseq);
+ for ( s1=0; s1< A->nseq; s1++)
+ for ( s2=0; s2<A->nseq; s2++)
+ {
+ if ( s1==s2) continue;
+ else if (!(CL->T[A->order[s1][0]]) || !(CL->T[A->order[s2][0]]))continue;
+
+ list=declare_int (A->len_aln, 2);
+
+ for ( n=0,col1=0; col1< A->len_aln; col1++)
+ {
+ if ( islower (A->seq_al[s1][col1]) || islower ( A->seq_al[s2][col1]))continue;
+ else if ( pos[s1][col1]<=0 || pos[s2][col1]<=0 ) continue;
+
+ list[n][0]=pos[s1][col1]-1;
+ list[n][1]=(int)100*residues[s1][s2][pos[s1][col1]-1][4];
+ n++;
+
+ }
+
+ sort_int_inv (list, 2, 1,0, n-1);
+
+ t=quantile_rank ( list,1, n,PP->filter);
+
+ if ( PP->print_rapdb)
+ {
+ for ( a=0; a<n; a++)
+ {
+ if ( list[a][1]>0 && a<t)fprintf ( stdout, "\nRAPDB QUANTILE REMOVE S1: %3d S2: %3d COL: %3d SCORE*100: %d", s1, s2, list[a][0], list[a][1]);
+ else if ( list[a][1]>0 && a>t)fprintf ( stdout, "\nRAPDB QUANTILE KEEP S1: %3d S2: %3d COL: %3d SCORE*100: %d", s1, s2, list[a][0], list[a][1]);
+ }
+ }
+
+ for ( a=0; a<t; a++)
+ {
+
+ residues[s1][s2][list[a][0]][0]=0;
+ residues[s1][s2][list[a][0]][1]=0;
+ residues[s1][s2][list[a][0]][2]=0;
+ residues[s1][s2][list[a][0]][3]=0;
+ residues[s1][s2][list[a][0]][4]=-1;
+
+ }
+
+ free_int (list, -1);
+ }
+
+ return residues;
+}
+Alignment * analyse_pdb ( Alignment *A, Alignment *ST, char *results)
+ {
+ int s1,s2,r1, r2,b, p;
+ int **pos;
+ float **normalize_len;
+ float m2, m4;
+ float pair_tot=0, pair_m1, pair_m2, pair_m3, pair_m4, pair_m5, pair_len=0;
+ float seq_tot, seq_m1, seq_m2, seq_m3, seq_m4, seq_m5,seq_len;
+ float msa_tot, msa_m1, msa_m2, msa_m3, msa_m4, msa_m5, msa_len;
+ float iRMSD_unit, iRMSD_max, iRMSD_min;
+ float ****residues;
+ Pdb_param *PP=NULL;
+ Constraint_list *CL;
+ char *average_file, *pairwise_file, *total_file, *irmsd_file=0;
+ FILE *fp, *average,*pairwise, *total, *irmsd_graph=0;
+
+
+ fp =vfopen ( results, "w");
+ pairwise=vfopen ((pairwise_file=vtmpnam (NULL)),"w");
+ average =vfopen ((average_file =vtmpnam (NULL)),"w");
+ total =vfopen ((total_file =vtmpnam (NULL)),"w");
+
+
+ CL=A->CL;
+
+ for ( s1=0; s1< (A->S)->nseq; s1++)
+ if ( CL->T[s1]){PP=(CL->T[s1])->pdb_param;break;}
+
+ if (PP->irmsd_graph)irmsd_graph =vfopen ((irmsd_file =vtmpnam (NULL)),"w");
+
+ fprintf ( fp, "\nAPDB_RESULT_FORMAT_02\n");
+ residues=analyse_pdb_residues ( A, A->CL,PP);
+ if ( PP->filter>=0)residues=quantile_apdb_filtration (A, residues, A->CL,PP, fp);
+ else if ( PP->filter<0)residues=irmsdmin_apdb_filtration (A, residues, A->CL,PP, fp);
+
+ pos=aln2pos_simple (A, A->nseq);
+
+
+
+
+
+ /*Compute the alignment length for normalization*/
+ normalize_len=declare_float (A->nseq+1, A->nseq+1);
+ for (s1=0; s1<A->nseq; s1++)
+ {
+ int l1, l2, r1, r2;
+ for (s2=0; s2<A->nseq; s2++)
+ {
+ for ( l1=l2=p=0; p< A->len_aln; p++)
+ {
+ r1=A->seq_al[s1][p];
+ r2=A->seq_al[s2][p];
+ if (!is_gap(r1) && isupper(r1))l1++;
+ if (!is_gap(r2) && isupper(r2))l2++;
+ }
+ normalize_len[s1][s2]=MIN(l1,l2);
+ }
+ }
+
+ msa_len=msa_tot=msa_m1=msa_m2=msa_m3=msa_m4=msa_m5=0;
+
+ for ( s1=0; s1< A->nseq; s1++)
+ {
+ if ( !(CL->T[A->order[s1][0]]))continue;
+ seq_len=seq_tot=seq_m1=seq_m2=seq_m3=seq_m4=seq_m5=0;
+ for ( s2=0; s2< A->nseq; s2++)
+ {
+ if ( s1==s2)continue;
+ if ( !(CL->T[A->order[s2][0]]))continue;
+ pair_tot=pair_m1=pair_m2=pair_m3=pair_m4=pair_m5=0;
+ for ( p=0; p< A->len_aln; p++)
+ {
+ r1=A->seq_al[s1][p];
+ r2=A->seq_al[s2][p];
+ b=pos[s1][p]-1;
+
+
+ if (PP->filter_aln)
+ {
+ if (is_gap(r1) || is_gap(r2) || residues[s1][s2][b][0]==0)
+ {
+ A->seq_al[s1][p]=tolower(r1);
+ A->seq_al[s2][p]=tolower(r2);
+ }
+ else
+ {
+ A->seq_al[s1][p]=toupper(r1);
+ A->seq_al[s2][p]=toupper(r2);
+ }
+
+ }
+
+ if ( PP->irmsd_graph && ( is_gap(r1) || is_gap(r2) || residues[s1][s2][b][0]==0))
+ {
+
+ fprintf ( irmsd_graph, "\n_IRMSD_GRAPH %10s %10s ALN: %c%c iRMSD: -1.00", A->name[s1], A->name[s2],A->seq_al[s1][p], A->seq_al[s2][p]);
+ }
+
+ if (is_gap(r1) || is_gap(r2) || residues[s1][s2][b][0]==0)continue;
+ pair_tot++;
+
+ /*APDB*/
+ m2=(residues[s1][s2][b][2]*100)/residues[s1][s2][b][0];
+ if (m2>PP->similarity_threshold){pair_m3++;}
+
+ /*iRMSD*/
+
+ m4=residues[s1][s2][b][4];
+
+ if ( PP->irmsd_graph )
+ {
+ fprintf ( irmsd_graph, "\nIRMSD_GRAPH %10s %10s ALN: %c%c iRMSD: %.2f", A->name[s1], A->name[s2],A->seq_al[s1][p], A->seq_al[s2][p], m4);
+ }
+ pair_m4+=m4;
+ }
+ pair_len=normalize_len[s1][s2];
+ if ( s1>s2)
+ {
+
+ fprintf ( pairwise, "\n\n#PAIRWISE: %s Vs %s",A->name[s1], A->name[s2]);
+ fprintf ( pairwise, "\n\tPAIRWISE EVALUATED: %6.2f %% [%s Vs %s] ", (pair_len==0)?-1:(pair_tot*100)/pair_len,A->name[s1], A->name[s2]);
+ fprintf ( pairwise, "\n\tPAIRWISE APDB: %6.2f %% [%s Vs %s] ", (pair_tot==0)?-1:(pair_m3*100)/pair_tot,A->name[s1], A->name[s2]);
+ fprintf ( pairwise, "\n\tPAIRWISE iRMSD: %6.2f Angs [%s Vs %s]", (pair_tot==0)?-1:pair_m4/pair_tot,A->name[s1], A->name[s2]);
+ fprintf ( pairwise, "\n\tPAIRWISE NiRMSD: %6.2f Angs [%s Vs %s] [%d pos]", (pair_tot==0)?-1:(pair_m4*pair_len)/(pair_tot*pair_tot), A->name[s1], A->name[s2], (int)pair_tot);
+ fprintf ( pairwise, "\n\tRAPDB PAIRS PAIRWISE N_NONEMPTY_PAIRS %d N_MAXIMUM_PAIRS %d",(int) pair_tot, (int)pair_len);
+ msa_m3+=pair_m3;
+ msa_m4+=pair_m4;
+ msa_tot+=pair_tot;
+ msa_len+=pair_len;
+ }
+ seq_m3+=pair_m3;
+ seq_m4+=pair_m4;
+ seq_tot+=pair_tot;
+ seq_len+=pair_len;
+
+ }
+
+ fprintf ( average, "\n\n#AVERAGE For Sequence %s", A->name[s1]);
+ fprintf ( average, "\n\tAVERAGE EVALUATED: %6.2f %% [%s]", (seq_len==0)?-1:(seq_tot*100)/seq_len, A->name[s1]);
+ fprintf ( average, "\n\tAVERAGE APDB: %6.2f %% [%s]", (seq_tot==0)?-1:(seq_m3*100)/seq_tot, A->name[s1]);
+ fprintf ( average, "\n\tAVERAGE iRMSD: %6.2f Angs [%s]", (seq_tot==0)?-1:seq_m4/seq_tot, A->name[s1]);
+ fprintf ( average, "\n\tAVERAGE NiRMS: %6.2f Angs [%s]", (seq_tot==0)?-1:(seq_m4*seq_len)/(seq_tot*seq_tot), A->name[s1]);
+ if ( strm (PP->color_mode, "apdb"))ST->score_seq[s1]=(seq_tot==0)?-1:(seq_m3*100)/pair_tot;
+ if (PP->print_rapdb)fprintf (average, "\n\tRAPDB PAIRS AVERAGE N_NONEMPTY_PAIRS %d N_MAXIMUM_PAIRS %d", (int)pair_tot, (int)pair_len);
+
+ if ( strm (PP->color_mode, "irmsd"))ST->score_seq[s1]=(seq_tot==0)?-1:10*((seq_m4*pair_len)/(seq_tot*seq_tot));
+
+ }
+ fprintf ( total, "\n\n#TOTAL for the Full MSA");
+ fprintf ( total, "\n\tTOTAL EVALUATED: %6.2f %% ", (msa_len==0)?-1:(msa_tot*100)/msa_len);
+ fprintf ( total, "\n\tTOTAL APDB: %6.2f %% ", (msa_tot==0)?-1:(msa_m3*100)/msa_tot);
+ fprintf ( total, "\n\tTOTAL iRMSD: %6.2f Angs", (msa_tot==0)?-1:msa_m4/msa_tot);
+ fprintf ( total, "\n\tTOTAL NiRMSD: %6.2f Angs", (msa_tot==0)?-1:(msa_m4*msa_len)/(msa_tot*msa_tot));
+ if (PP->print_rapdb)fprintf (total, "\n\tRAPDB PAIRS TOTAL N_NONEMPTY_PAIRS: %d N_MAXIMUM_PAIRS %d", (int)msa_tot, (int)msa_len);
+
+ if ( strm (PP->color_mode, "apdb")) ST->score_aln=ST->score=A->score_aln=A->score=(msa_tot==0)?-1:(msa_m3*100)/msa_tot;
+ if ( strm (PP->color_mode, "irmsd"))ST->score_aln=ST->score=A->score_aln=A->score=(msa_tot==0)?-1:10*((msa_m4*msa_len)/(msa_tot*msa_tot));
+
+ vfclose (average);vfclose (total); vfclose (pairwise);if (PP->irmsd_graph)vfclose (irmsd_graph);
+ fp=display_file_content (fp, pairwise_file);
+ fp=display_file_content (fp, average_file);
+ fp=display_file_content (fp, total_file);
+ if ( PP->irmsd_graph)fp=display_file_content (fp, irmsd_file);
+
+ fprintf ( fp, "\n\n# EVALUATED: Fraction of Pairwise Columns Evaluated\n");
+ fprintf ( fp, "# APDB: Fraction of Correct Columns according to APDB\n");
+ fprintf ( fp, "# iRMDS: Average iRMSD over all evaluated columns\n");
+ fprintf ( fp, "# NiRMDS: iRMSD*MIN(L1,L2)/Number Evaluated Columns\n");
+ fprintf ( fp, "# Main Parameter: -maximum_distance %.2f Angstrom\n", PP->maximum_distance);
+
+ fprintf ( fp, "# Undefined values are set to -1 and indicate LOW Alignment Quality\n");
+ fp=print_program_information (fp, NULL);
+
+
+
+
+ /*Color Output*/
+ for (iRMSD_max=0,iRMSD_min=10000,s1=0; s1<A->nseq; s1++)
+ for ( s2=0; s2< A->nseq; s2++)
+ for (p=0; p<A->len_aln; p++)
+ {
+ if ( residues[s1][s2][p][4]>0)
+ {
+ iRMSD_max=MAX(iRMSD_max, residues[s1][s2][p][4]);
+ iRMSD_min=MAX(iRMSD_min, residues[s1][s2][p][4]);
+ }
+
+ }
+ iRMSD_unit=iRMSD_max/8;
+
+ for (p=0; p< A->len_aln; p++)
+ for ( s1=0; s1< A->nseq; s1++)
+ {
+
+ for ( p=0; p< A->len_aln; p++)
+ {
+ r1=A->seq_al[s1][p];
+ b=pos[s1][p]-1;
+ if ( is_gap(r1) || !(CL->T[A->order[s1][0]]))
+ ST->seq_al[s1][p]=NO_COLOR_RESIDUE;
+ else
+ {
+ float tot_m2=0, tot_m4=0, v=0;
+ seq_m2=seq_m4=0;
+
+ for (s2=0; s2< A->nseq; s2++)
+ {
+ r2=A->seq_al[s1][p];
+ if ( s1==s2) continue;
+ if (is_gap(r2) || !(CL->T[A->order[s1][0]]) || residues[s1][s2][b][0]==0)continue;
+
+ seq_m2+=m2=(residues[s1][s2][b][2]*100)/residues[s1][s2][b][0];
+ tot_m2++;
+
+ m4=residues[s1][s2][b][4];
+ if (m4>=0)
+ {
+ seq_m4+=m4;
+ tot_m4++;
+ }
+ }
+
+ if (strm ( PP->color_mode, "apdb"))
+ {
+ if (tot_m2==0)v=NO_COLOR_RESIDUE;
+ else v=MIN((seq_m2/(10*tot_m2)),9);
+ }
+ else if ( strm (PP->color_mode, "irmsd"))
+ {
+ if ( tot_m4==0)v=NO_COLOR_RESIDUE;
+ else v=(8-(int)((seq_m4/(iRMSD_unit*tot_m4))))+1;
+ }
+ ST->seq_al[s1][p]=v;
+
+ }
+ }
+ }
+ for ( p=0; p<A->len_aln; p++) ST->seq_al[A->nseq][p]=NO_COLOR_RESIDUE;
+
+
+ ST->generic_comment=vcalloc ( 100, sizeof (int));
+ if ( strm (PP->color_mode, "apdb"))
+ {
+ sprintf ( ST->generic_comment, "# APDB Evaluation: Color Range Blue-[0 %% -- 100 %%]-Red\n# Sequence Score: APDB\n# Local Score: APDB\n\n");
+ }
+ else if ( strm (PP->color_mode, "irmsd"))
+ {
+ sprintf ( ST->generic_comment, "\n# iRMSD Evaluation:\n# Sequence score: NiRMSD (Angstrom*10)\n# Local Score: iRMSD, Blue-[%.2f Ang. -- 0.00 Ang.]-Red \n", iRMSD_max);
+ }
+
+ fprintf ( fp, "\n");
+ vfclose (fp);
+ free_int (pos, -1);
+ return ST;
+ }
+float **** analyse_pdb_residues ( Alignment *A, Constraint_list *CL, Pdb_param *pdb_param)
+ {
+
+ int **pos;
+ int s1, s2, rs1, rs2;
+ int col1, col2;
+ float ****distances;
+
+ /*Distances[Nseq][len_aln][4]
+ distances...[0]: Number of residues within the bubble
+ distances...[1]: Absolute difference of distance of residues within Bubble
+ distances...[2]: Number of residues within the bubble with Delta dist < md_threshold
+ distances ..[3]: Sum of squared difference of distances
+ distances ..[4]: iRMSD
+ */
+ float d1, d2,delta;
+ int wd1, wd2;
+ int in_bubble=0;
+ int real_res1_col1=0;
+ int real_res1_col2;
+ int real_res2_col1;
+ int real_res2_col2;
+ Pdb_param *PP;
+ int print_rapdb;
+ float nrapdb, rapdb;
+ Alignment *BA=NULL;
+
+ PP=pdb_param;
+ print_rapdb=PP->print_rapdb;
+
+ distances=declare_arrayN(4, sizeof (float), A->nseq, A->nseq, 0, 0);
+
+ /*Pre-computation of the internal distances----> T[seq]->ca_dist[len][len]*/
+ /*Can be avoided if distance_on_request set to 1 */
+
+ for ( s1=0; s1< A->nseq; s1++)
+ {
+ rs1=A->order[s1][0];
+ if (CL->T[rs1] && !(CL->T[rs1])->ca_dist)(CL->T[rs1])->ca_dist=measure_ca_distances(CL->T[rs1]);
+ for ( s2=0; s2< A->nseq; s2++)
+ {
+ distances[s1][s2]=declare_float ( A->len_aln, 6);
+ }
+ }
+ pos=aln2pos_simple (A, A->nseq);
+
+ for ( s1=0; s1< A->nseq; s1++)
+ for ( col1=0; col1< A->len_aln; col1++)
+ for ( s2=0; s2<A->nseq; s2++)
+ {
+ rs1=A->order[s1][0];
+ rs2=A->order[s2][0];
+ rapdb=0;
+ nrapdb=0;
+ if ( s1==s2) continue;
+ else if (!(CL->T[rs1]) || !(CL->T[rs2]))continue;
+ else if ( islower (A->seq_al[s1][col1]) || islower ( A->seq_al[s2][col1]))continue;
+ else if ( pos[s1][col1]<=0 || pos[s2][col1]<=0 ) continue;
+
+ if ( print_rapdb && s2>s1)
+ {
+
+ fprintf ( stdout, "RAPDB S1: %s S2: %s POS %d %d %c %d %c ", A->name[s1], A->name[s2], col1+1, pos[s1][col1],A->seq_al[s1][col1], pos[s2][col1],A->seq_al[s2][col1]);
+ BA=copy_aln (A, BA);
+ lower_string (BA->seq_al[s1]);
+ lower_string (BA->seq_al[s2]);
+ BA->seq_al[s1][col1]=toupper (BA->seq_al[s1][col1]);
+ BA->seq_al[s2][col1]=toupper (BA->seq_al[s2][col1]);
+ }
+
+ for ( col2=0; col2<A->len_aln; col2++)
+ {
+
+ if (pos[s1][col2]<=0 || pos[s2][col2]<=0 )continue;
+ else if ( FABS((pos[s1][col2]-pos[s1][col1]))<=PP->n_excluded_nb)continue;
+ else if ( FABS((pos[s2][col2]-pos[s2][col1]))<=PP->n_excluded_nb)continue;
+ else if ( islower (A->seq_al[s1][col2]) || islower ( A->seq_al[s2][col2]))continue;
+
+ real_res1_col1=pos[s1][col1]-1;
+ real_res1_col2=pos[s1][col2]-1;
+
+ real_res2_col1=pos[s2][col1]-1;
+ real_res2_col2=pos[s2][col2]-1;
+
+ d1=(CL->T[rs1])->ca_dist[real_res1_col1][real_res1_col2];
+ d2=(CL->T[rs2])->ca_dist[real_res2_col1][real_res2_col2];
+
+ if ( d1==UNDEFINED || d2 == UNDEFINED) continue;
+
+
+
+ if ( strm ( PP->local_mode, "sphere"))
+ {
+ in_bubble= (d1<PP->maximum_distance && d2<PP->maximum_distance)?1:0; ;
+ }
+ else if ( strm ( PP->local_mode, "window"))
+ {
+ wd1=FABS((pos[s1][col2]-pos[s1][col1]));
+ wd2=FABS((pos[s2][col2]-pos[s2][col1]));
+ in_bubble= (wd1<PP->maximum_distance && wd2<PP->maximum_distance)?1:0; ;
+ }
+
+ if (in_bubble)
+ {
+ if ( print_rapdb && s2 >s1)
+ {
+ fprintf ( stdout, "NB %d %d %c %d %c ", col2, pos[s1][col2], A->seq_al[s1][col2], pos[s2][col2], A->seq_al[s2][col2]);
+ BA->seq_al[s1][col2]=toupper (BA->seq_al[s1][col2]);
+ BA->seq_al[s2][col2]=toupper (BA->seq_al[s2][col2]);
+ }
+ delta=FABS((d1-d2));
+ if (delta<PP->md_threshold)
+ distances[s1][s2][real_res1_col1][2]++;
+ distances[s1][s2][real_res1_col1][1]+=delta;
+ distances[s1][s2][real_res1_col1][0]++;
+ distances[s1][s2][real_res1_col1][3]+=delta*delta;
+ nrapdb++;
+ rapdb+=delta*delta;
+ }
+ }
+
+ if ( nrapdb==0)distances[s1][s2][real_res1_col1][4]=-1;
+ else distances[s1][s2][real_res1_col1][4]=(float)sqrt((double)(rapdb/nrapdb));
+
+ if ( print_rapdb && s2>s1)
+ {
+ if (nrapdb==0)
+ {
+ fprintf ( stdout, "APDB: UNDEFINED\n");
+ }
+ else
+ {
+
+ fprintf ( stdout, " APDB: %.2f ",(float)sqrt((double)(rapdb/nrapdb)));
+ BA->residue_case=KEEP_CASE;unalign_residues (BA, s1, s2);
+ fprintf ( stdout,"SEQ1: %s %s SEQ2: %s %s\n", BA->name[s1], BA->seq_al[s1], BA->name[s2], BA->seq_al[s2]);
+ }
+ }
+
+ }
+
+ free_aln (BA);
+ free_int (pos, -1);
+ return distances;
+ }
+int pair_res_suitable4trmsd (int s1,int col1, int col2, Alignment *A, int **pos, Pdb_param *PP, Constraint_list *CL,int *s);
+int aln_column_contains_gap (Alignment *A, int c);
+float aln2ncol4trmsd(Alignment *A, int **pos, Constraint_list *CL, int **lc);
+int pair_columns_suitable4trmsd(int col1, int col2, Alignment *A, int **pos, Pdb_param *PP, Constraint_list *CL,int *s);
+int column_is_suitable4trmsd(int col1,Alignment *A, int **pos, Pdb_param *PP, Constraint_list *CL,int *s);
+
+
+
+NT_node trmsdmat2tree (float **dm, int **count,Alignment *A);
+Alignment * msa2struc_dist ( Alignment *A, Alignment *ST, char *results, int gapped, int min_ncol4trmsd)
+ {
+
+ int **pos, c;
+ FILE *tl;
+ int s1, s2, rs1, rs2;
+ int col1, col2;
+ float ****distances;
+ float **dm,**tdm;
+ int **count,**tcount;
+ int print_subtrees=0;
+ float min, max;
+
+ /*Distances[Nseq][len_aln][4]
+ distances...[0]: Number of residues within the bubble
+ distances...[1]: Absolute difference of distance of residues within Bubble
+ distances...[2]: Number of residues within the bubble with Delta dist < md_threshold
+ distances ..[3]: Sum of squared difference of distances
+ distances ..[4]: iRMSD
+ */
+ Pdb_param *pdb_param;
+ Constraint_list *CL;
+ int a, b, ncol, npos,n;
+ float d1, d2,delta;
+ int wd1, wd2;
+ int in_bubble=0;
+ int real_res1_col1=0;
+ int real_res1_col2;
+ int real_res2_col1;
+ int real_res2_col2;
+ Pdb_param *PP;
+ int print_rapdb;
+ float nrapdb, rapdb;
+ Alignment *BA=NULL;
+ NT_node *T0,*T1,*T2,*PT, *POS;
+ NT_node BT0, BT10,BT50, BT100=NULL,RBT;
+ char **pair_pos_list;
+
+ int ntree=0, ntree2;
+
+ Alignment *B;
+ char *pos_list;
+ char *tot_pos_list;
+
+ char *struc_tree10;
+ char *struc_tree100;
+ char *struc_tree50;
+ char *struc_tree0;
+ char *consense_file;
+
+ char *color_struc_tree;
+ int **score;
+ int proceed=1;
+ int **lc;
+ int used;
+
+ if (min_ncol4trmsd<0)
+ {
+ min_ncol4trmsd*=-1;
+ min_ncol4trmsd=(min_ncol4trmsd*A->len_aln)/100;
+ }
+ else if ( min_ncol4trmsd>=A->len_aln)
+ {
+ min_ncol4trmsd=A->len_aln-1;
+ }
+
+ lc=declare_int (A->nseq, 2);
+ for (a=0; a<A->nseq; a++)lc[a][0]=a;
+
+ declare_name(tot_pos_list);
+ sprintf ( tot_pos_list, "%s.struc_tree.list", results);
+
+ declare_name(consense_file);
+ sprintf (consense_file, "%s.struc_tree.consense_output", results);
+
+ declare_name(pos_list);
+ sprintf ( pos_list, "%s.pos_list", results);
+
+ declare_name(struc_tree0);
+ sprintf ( struc_tree0, "%s.struc_tree.consensus",results);
+
+ declare_name(struc_tree10);
+ sprintf ( struc_tree10, "%s.struc_tree10",results);
+
+ declare_name(struc_tree100);
+ sprintf ( struc_tree100, "%s.struc_tree100",results);
+
+ declare_name(struc_tree50);
+ sprintf ( struc_tree50, "%s.struc_tree50",results);
+
+ declare_name(color_struc_tree);
+ sprintf ( color_struc_tree, "%s.struc_tree.html", results);
+
+ pair_pos_list=declare_char (A->len_aln*A->len_aln+1, 100);
+ T1=vcalloc (A->len_aln*A->len_aln+1, sizeof (NT_node));
+ T2=vcalloc (A->len_aln+1, sizeof (NT_node));
+
+ PT=vcalloc (A->len_aln*A->len_aln+1, sizeof (NT_node));
+ POS=vcalloc (A->len_aln+1, sizeof (NT_node));
+
+ CL=A->CL;
+
+ //Check all sequences have a PDB structure
+
+ for (used=0,a=0; a<A->nseq; a++)
+ {
+ if ( ! seq2P_template_file(A->S,a))
+ {
+ add_warning (stderr, "Sequence %s removed from the dataset [No Usable Structure]", A->name[a]);
+ }
+ else
+ {
+ if (used!=a)
+ {
+ sprintf (A->name[used], "%s", A->name[a]);
+ sprintf (A->seq_al[used], "%s", A->seq_al[a]);
+ for (b=0; b<4; b++)A->order[used][b]=A->order[a][b];
+ }
+ used++;
+ }
+ }
+
+ A->nseq=used;
+
+ if (A->nseq<2)myexit (fprintf_error(stderr, "Two sequences at least must have a known structure"));
+
+ for ( s1=0; s1< (A->S)->nseq; s1++)
+ if ( CL->T[s1]){PP=(CL->T[s1])->pdb_param;break;}
+
+ for ( s1=0; s1< A->nseq; s1++)
+ {
+ rs1=A->order[s1][0];
+ if (CL->T[rs1] && !(CL->T[rs1])->ca_dist)(CL->T[rs1])->ca_dist=measure_ca_distances(CL->T[rs1]);
+ }
+ pos=aln2pos_simple (A, A->nseq);
+
+ dm=declare_float (A->nseq, A->nseq);
+ count=declare_int (A->nseq, A->nseq);
+ tdm=declare_float (A->nseq, A->nseq);
+ tcount=declare_int (A->nseq, A->nseq);
+
+ PP->maximum_distance=1000;
+ sprintf ( PP->local_mode, "sphere");
+
+ while ((npos=aln2ncol4trmsd(A,pos,CL,lc))<min_ncol4trmsd && A->nseq>1)
+ {
+
+ sort_int_inv (lc,2, 1, 0,A->nseq-1);
+ add_information (stderr, "Remove Sequence [%s] that contains %d un-suitable positions", A->name[lc[0][0]], lc[0][1]);
+ A=remove_seq_from_aln (A, A->name[lc[0][0]]);
+ ungap_aln (A);
+ pos=aln2pos_simple (A, A->nseq);
+ }
+ if (!A->nseq)
+ {
+ myexit (fprintf_error(stderr,"No suitable pair of column supporting a tree"));
+ }
+ else
+ fprintf ( stderr, "\n---- Number of usable positions: %d [%.2f %%]\n", npos, ((float)npos*100)/(float)A->len_aln);
+
+ tl=vfopen (tot_pos_list, "w");
+ for (ncol=0,ntree=0, col1=0; col1< A->len_aln; col1++)
+ {
+ int w,tree, cont;
+ //output_completion (stderr, col1, A->len_aln,1, "Sample Columns");
+ if (!gapped && aln_column_contains_gap (A, col1))continue;
+ for ( cont=1,ntree2=0,col2=0; col2<A->len_aln; col2++)
+ {
+ for (s1=0; s1< A->nseq-1; s1++)
+ {
+ rs1=A->order[s1][0];
+ if (!pair_res_suitable4trmsd (s1,col1, col2, A, pos, PP, CL, &w))continue;
+ for ( s2=s1+1; s2<A->nseq; s2++)
+ {
+ if (!pair_res_suitable4trmsd (s2,col1, col2, A, pos, PP, CL, &w))continue;
+
+ rs2=A->order[s2][0];
+ real_res1_col1=pos[s1][col1]-1;
+ real_res1_col2=pos[s1][col2]-1;
+ real_res2_col1=pos[s2][col1]-1;
+ real_res2_col2=pos[s2][col2]-1;
+
+ d1=(CL->T[rs1])->ca_dist[real_res1_col1][real_res1_col2];
+ d2=(CL->T[rs2])->ca_dist[real_res2_col1][real_res2_col2];
+
+ delta=FABS((d1-d2));
+ dm[s1][s2]=dm[s2][s1]+=delta;
+ tdm[s1][s2]=tdm[s2][s1]+=delta;
+ tcount[s1][s2]++;
+ tcount[s2][s1]++;
+
+ count[s1][s2]++;
+ count[s2][s1]++;
+ }
+ }
+ }
+
+
+
+ if ((POS[col1]=trmsdmat2tree (dm, count, A)))
+ {
+ T1[ntree]=POS[col1];
+ fprintf (tl, "\n>Tree_%d Column\n", col1+1);
+ print_tree (T1[ntree], "newick", tl);
+ ntree++;
+ }
+ }
+ vfclose (tl);
+
+ if (!ntree){fprintf ( stderr, "\nERROR: No suitable pair of column supporting a tree [FATAL]\n"); exit (EXIT_SUCCESS);}
+
+ score=treelist2avg_treecmp (T1, NULL);
+ display_output_filename( stderr,"TreeList","newick",tot_pos_list, CHECK);
+
+ if (treelist_file2consense (tot_pos_list, NULL, consense_file))
+ {
+ display_output_filename( stderr,"ConsenseTree","phylip",consense_file, CHECK);
+ }
+ else
+ {
+ fprintf ( stderr, "\nPhylip is not installed: the program could not produce the consense output. This is not mandatory but useful");
+ }
+
+ //consensus tree
+
+ if ((BT100=treelist2filtered_bootstrap (T1, NULL,score, 1.0)))
+ {
+ vfclose (print_tree (BT100,"newick", vfopen (struc_tree0, "w")));
+ display_output_filename( stderr,"Tree","newick",struc_tree0, CHECK);
+ }
+ if (print_subtrees)
+ {
+
+ if ( (BT0=trmsdmat2tree (tdm, tcount, A)))
+ {
+ vfclose (print_tree (BT0,"newick", vfopen (struc_tree0, "w")));
+ display_output_filename( stderr,"Tree","newick",struc_tree0, CHECK);
+ }
+ if ((BT10=treelist2filtered_bootstrap (T1, NULL,score, 0.1)))
+ {
+ vfclose (print_tree (BT10,"newick", vfopen (struc_tree10, "w")));
+ display_output_filename( stderr,"Tree","newick",struc_tree10, CHECK);
+ }
+
+ if ((BT50=treelist2filtered_bootstrap (T1, NULL, score,0.5)))
+ {
+ vfclose (print_tree (BT50,"newick", vfopen (struc_tree50, "w")));
+ display_output_filename( stderr,"Tree","newick",struc_tree50, CHECK);
+ }
+ }
+
+
+ if (!BT100)BT100=treelist2filtered_bootstrap (T1, NULL,score, 1.0);
+
+ RBT=BT100;
+ if (RBT)
+ {
+ B=copy_aln (A, NULL);
+ for (a=0; a<A->len_aln; a++)
+ {
+ int score;
+ Tree_sim *S=NULL;
+
+ if (POS[a])
+ {
+ S=tree_cmp (POS[a], RBT);
+ score=S->uw/10;
+ }
+ else
+ {
+ score=NO_COLOR_RESIDUE;
+ }
+
+ for (b=0; b<B->nseq; b++)
+ {
+ if ( is_gap (B->seq_al[b][a]) || score == NO_COLOR_RESIDUE)
+ {
+ B->seq_al[b][a]=NO_COLOR_RESIDUE;
+ }
+ else
+ {
+ B->seq_al[b][a]=S->uw/10;
+ }
+ }
+ if (S)vfree (S);
+ }
+
+ output_format_aln ("score_html", A,B,color_struc_tree);
+ display_output_filename( stderr,"Colored MSA","score_html",color_struc_tree, CHECK);
+ free_aln (BA);
+ fprintf ( stderr, "\n");
+ }
+ fprintf ( stderr, "\n");
+ free_int (pos, -1);
+ exit (EXIT_SUCCESS);
+ return NULL;
+ }
+NT_node trmsdmat2tree (float **dm, int **count,Alignment *A)
+{
+ float min, max;
+ int s1, s2;
+ NT_node T;
+ int ns;
+ int **dm_int;
+
+ ns=A->nseq;
+ for (s1=0; s1<ns-1; s1++)
+ for (s2=s1+1; s2<ns; s2++)
+ {
+ if ( count [s1][s2])dm[s1][s2]=dm[s2][s1]=dm[s1][s2]/(float)count[s1][s2];
+ else
+ {
+ return NULL;
+ }
+ if (s1==0 && s2==1)min=max=dm[s1][s2];
+ min=MIN(dm[s1][s2], min);
+ max=MAX(dm[s1][s2], max);
+ }
+ dm_int=declare_int (ns, ns);
+ for (s1=0; s1<A->nseq-1; s1++)
+ for (s2=s1+1; s2<A->nseq; s2++)
+ {
+ dm_int[s1][s2]=dm_int[s2][s1]=((dm[s1][s2])/(max))*100;
+ }
+ T=compute_std_tree_2(A, dm_int, "_TMODE_upgma");
+ free_int (dm_int, -1);
+ for (s1=0; s1<ns; s1++)for ( s2=0; s2<ns; s2++){dm[s1][s2]=count[s1][s2]=0;}
+ return T;
+}
+
+int pair_res_suitable4trmsd (int s1,int col1, int col2, Alignment *A, int **pos, Pdb_param *PP, Constraint_list *CL,int *s)
+{
+ int rs;
+ rs=A->order[s1][0];
+ if ( !(CL->T[rs])){s[0]=s1; return 0;}
+ else if (is_gap (A->seq_al[s1][col1])){s[0]=s1;return 0;}
+ else if (is_gap (A->seq_al[s1][col2])){s[0]=s1;return 0;}
+
+ else if (islower(A->seq_al[s1][col1])){s[0]=s1; return 0;}
+ else if (islower(A->seq_al[s1][col2])){s[0]=s1; return 0;}
+
+ else if ( FABS(((pos[s1][col2])-(pos[s1][col1])))<=PP->n_excluded_nb){s[0]=s1;return 0;}
+ else if ((CL->T[rs])->ca_dist[pos[s1][col1]-1][pos[s1][col2]-1]==UNDEFINED){s[0]=s1;return 0;}
+ return 1;
+}
+int pair_columns_suitable4trmsd(int col1, int col2, Alignment *A, int **pos, Pdb_param *PP, Constraint_list *CL,int *s)
+{
+ int s1;
+ if (!column_is_suitable4trmsd (col1, A, pos, PP, CL,s))return 0;
+ if (!column_is_suitable4trmsd (col2, A, pos, PP, CL,s))return 0;
+ for (s1=0; s1<A->nseq; s1++)
+ {
+ int rs, rr1, rr2;
+
+ rs=A->order[s1][0];
+ if ( FABS(((pos[s1][col2])-(pos[s1][col1])))<=PP->n_excluded_nb){s[0]=s1;return 0;}
+ if ((CL->T[rs])->ca_dist[pos[s1][col1]-1][pos[s1][col2]-1]==UNDEFINED){s[0]=s1;return 0;}
+ rr1=pos[s1][col1]-1;
+ rr2=pos[s1][col2]-1;
+ if ((CL->T[rs])->ca_dist[rr1][rr2]>PP->maximum_distance){s[0]=s1;return 0;}
+ }
+ return 1;
+}
+int column_is_suitable4trmsd(int col1,Alignment *A, int **pos, Pdb_param *PP, Constraint_list *CL,int *s)
+{
+ int s1;
+ for ( s1=0; s1<A->nseq; s1++)
+ {
+ int rs;
+ rs=A->order[s1][0];
+ if ( !(CL->T[rs])){s[0]=s1; return 0;}
+ else if (is_gap (A->seq_al[s1][col1])){s[0]=s1;return 0;}
+ else if (islower(A->seq_al[s1][col1])){s[0]=s1; return 0;}
+ }
+ return 1;
+}
+int aln_column_contains_gap (Alignment *A, int c)
+{
+ int a, b;
+ if ( !A || c>=A->len_aln || c<0)
+ {
+ printf ( "\nERROR: values out of range in aln_column_contains_gap [FATL:%s]\n", PROGRAM);
+ exit (EXIT_FAILURE);
+ }
+ for ( a=0; a<A->nseq; a++) if ( is_gap(A->seq_al[a][c]))return 1;
+ return 0;
+}
+
+
+float aln2ncol4trmsd(Alignment *A, int **pos, Constraint_list *CL, int **lc)
+{
+ //This function estimates the number of columns suitable for constructing a trmsd
+ int col1, s1, ncol, n, rs1, real_res1_col1;
+
+ for (s1=0; s1<A->nseq; s1++){lc[s1][0]=s1; lc[s1][1]=0;}
+ for (ncol=0,col1=0; col1< A->len_aln; col1++)
+ {
+ for (n=0,s1=0; s1<A->nseq; s1++)
+ {
+ real_res1_col1=pos[s1][col1]-1;
+ rs1=A->order[s1][0];
+
+ if (real_res1_col1<0)lc[s1][1]++;
+ else if (!((CL->T[A->order[s1][0]])->ca[real_res1_col1]))lc[s1][1]++;
+ else n++;
+ }
+ if (n==A->nseq)
+ {
+ ncol++;
+ }
+ }
+ return ncol;
+}
+
+float square_atom ( Atom *X)
+{
+
+ return X->x*X->x + X->y*X->y + X->z*X->z;
+}
+Atom* reframe_atom ( Atom *X, Atom*Y, Atom *Z, Atom *IN, Atom *R)
+ {
+ float new_x, new_y, new_z;
+
+ if ( R==NULL)R=vcalloc ( 1, sizeof (Atom));
+
+
+ new_x= X->x*IN->x + Y->x*IN->y +Z->x*IN->z;
+ new_y= X->y*IN->x + Y->y*IN->y +Z->y*IN->z;
+ new_z= X->z*IN->x + Y->z*IN->y +Z->z*IN->z;
+
+ R->x=new_x;
+ R->y=new_y;
+ R->z=new_z;
+ return R;
+ }
+
+Atom* add_atom ( Atom *A, Atom*B, Atom *R)
+{
+ if ( R==NULL)R=vcalloc ( 1, sizeof (Atom));
+
+ R->x=A->x+B->x;
+ R->y=A->y+B->y;
+ R->z=A->z+B->z;
+
+ return R;
+}
+Atom* diff_atom ( Atom *A, Atom*B, Atom *R)
+{
+ if ( R==NULL)R=vcalloc ( 1, sizeof (Atom));
+
+ R->x=A->x-B->x;
+ R->y=A->y-B->y;
+ R->z=A->z-B->z;
+
+ return R;
+}
+
+Atom * copy_atom ( Atom *A, Atom*R)
+{
+ if ( R==NULL)R=vcalloc ( 1, sizeof (Atom));
+ R->num=A->num;
+ R->res_num=A->res_num;
+ R->x=A->x;
+ R->y=A->y;
+ R->z=A->z;
+
+ sprintf( R->type, "%s", A->type);
+ return R;
+}
+ void print_atom (Atom *A)
+{
+ fprintf ( stdout, "%.2f %.2f %.2f", A->x, A->y, A->z);
+}
+/************************************************************************/
+/* */
+/* NUSSINOV */
+/* */
+/************************************************************************/
+
+/*---------prototypes ----------*/
+static void computeBasePairMatrix(int**M,char*S,int l, int T);
+static int backtrack(int a,int b,int**M,char*S,char*P, int T);
+
+
+
+static int basePair(char x, char y)
+{
+ static short **mat;
+
+ if (!mat)
+ {
+ char alp[20];
+ int a, b, c1, c2, lc1, lc2;
+ mat=declare_short (256, 256);
+ sprintf ( alp, "AGCTUagctu");
+ for (a=0; a<strlen (alp); a++)
+ {
+ for (b=a; b<strlen (alp)-1; b++)
+ {
+ c1=alp[a];c2=alp[b];
+ lc1=tolower(c1); lc2=tolower(c2);
+ if ( lc1=='g' && lc2=='c')
+ mat[c1][c2]=1;
+ else if ( lc1=='a' && lc2=='u')
+ mat[c1][c2]=1;
+ else if ( lc1=='u' && lc2=='g')
+ mat [c1][c2]=1;
+ mat[c2][c1]=mat[c1][c2];
+ }
+ }
+ }
+ return (int)mat[(int)x][(int)y];
+}
+
+
+
+/* ------------------------------------------------------------ */
+
+char *nussinov(char *S, int THRESHOLD)
+{
+ char *paren;
+ int i;
+
+ /*-------------------------------
+ S is RNA sequence
+ paren is parenthesis expression for
+ optimal RNA secondary structure
+ THRESHOLD: Min distance between two paired residues
+ -------------------------------*/
+
+ int **numBasePairs;
+ int n;
+
+ /*----- initialization --*/
+ n = strlen(S);
+ paren=vcalloc (n+1, sizeof (char));
+ numBasePairs=declare_int (n,n);
+
+ for (i=0;i<n;i++) paren[i]='.';
+ paren[n]='\0'; // paren is string of same length as S
+ computeBasePairMatrix(numBasePairs,S,n, THRESHOLD);
+ backtrack(0,n-1,numBasePairs,S,paren, THRESHOLD);
+ free_int (numBasePairs, -1);
+ return paren;
+}
+
+static void computeBasePairMatrix(int** numBasePairs,char *S,int n, int THRESHOLD)
+{
+ int i,j,d,k,max,val,index;
+
+ for (d = THRESHOLD; d < n; d++){
+ for(i=0; i < n; i++)
+ {
+ j=i+d;
+ if (j < n){
+ max=0;
+ index=n;
+ /*-------------------------------------
+ if index<n at end of for-loop, then this
+ means that index and j form a base pair,
+ and this is noted by numBasePairs[j][i]=index.
+ if index=n at end of for-loop, then this
+ means that j is not base paired.
+ -------------------------------------*/
+
+ if ( numBasePairs[i][j-1]>max ){
+ max = numBasePairs[i][j-1];
+ index = n;
+ // j not basepaired with some k such that i<k<j
+ }
+
+ val = basePair(S[i],S[j]) + numBasePairs[i+1][j-1];
+ if ( j-i<= THRESHOLD && val > max ){
+ max = val;
+ index=i;
+ }
+ for(k=i; k<=j-THRESHOLD; k++){
+ val = basePair(S[k],S[j]) + numBasePairs[i][k-1]
+ + numBasePairs[k+1][j-1];
+ if (val > max) {
+ max = val;
+ index=k;
+ }
+ }
+ numBasePairs[i][j]=max;
+ if (index<n)
+ numBasePairs[j][i]=index;
+ else
+ numBasePairs[j][i]=-1;
+ }
+ }
+ }
+
+}
+
+
+
+
+static int backtrack(int i, int j, int **numBasePairs,char *S, char *paren, int THRESHOLD)
+{
+ int k;
+
+ k = numBasePairs[j][i];
+ if(k != -1)
+ {
+ paren[k] = '(';
+ paren[j] = ')';
+ if( THRESHOLD <= (j-1)-(k+1) )
+ backtrack(k+1,j-1,numBasePairs,S,paren, THRESHOLD);
+ if (THRESHOLD <= k-1-i )
+ backtrack(i,k-1,numBasePairs,S,paren, THRESHOLD);
+ }
+ else{
+ if( THRESHOLD <= j-1-i )
+ {
+ backtrack(i,j-1,numBasePairs,S,paren, THRESHOLD);
+ }
+ else
+ return 0;
+ }
+ return 0;}
+
+int count;
+char * rna_struc2rna_lib ( char *seq_name, char *seq, char *name)
+{
+ FILE *fp;
+ char *st;
+
+
+ st=nussinov (seq, 2);
+ if ( name==NULL)name=vtmpnam(NULL);
+ fp=vfopen ( name, "w");
+ fprintf (fp, "! TC_LIB_FORMAT_01\n");
+ fprintf (fp, "1\n%s %d %s\n", seq_name, (int)strlen (seq), seq);
+ fprintf (fp, "#1 1\n");
+ display_rna_ss (0, seq, st, fp);
+ fprintf ( fp, "! SEQ_1_TO_N\n");
+ vfclose (fp);
+ vfree (st);
+ //printf_system ( "cp %s test", name);
+ return name;
+}
+int display_rna_ss ( int n, char *seq, char *st, FILE *fp)
+{
+ char p;
+ char string[100];
+ static int thread;
+
+ while ((p=st[n])!='\0')
+ {
+ if ( p=='(')
+ {
+ thread=count++;
+ sprintf (string, "%d",n+1);
+ n=display_rna_ss (n+1, seq, st, fp);
+ fprintf (fp, "%s %d 100\n", string, n+1);
+ }
+ else if (p=='.');
+ else if (p==')')
+ {
+ return n;
+ }
+ n++;
+ }
+ return n;
+}
+/******************************COPYRIGHT NOTICE*******************************/
+/*© Centro de Regulacio Genomica */
+/*and */
+/*Cedric Notredame */
+/*Fri Feb 18 08:27:45 CET 2011 - Revision 596. */
+/*All rights reserved.*/
+/*This file is part of T-COFFEE.*/
+/**/
+/* T-COFFEE is free software; you can redistribute it and/or modify*/
+/* it under the terms of the GNU General Public License as published by*/
+/* the Free Software Foundation; either version 2 of the License, or*/
+/* (at your option) any later version.*/
+/**/
+/* T-COFFEE is distributed in the hope that it will be useful,*/
+/* but WITHOUT ANY WARRANTY; without even the implied warranty of*/
+/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/
+/* GNU General Public License for more details.*/
+/**/
+/* You should have received a copy of the GNU General Public License*/
+/* along with Foobar; if not, write to the Free Software*/
+/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/
+/*............................................... |*/
+/* If you need some more information*/
+/* cedric.notredame@europe.com*/
+/*............................................... |*/
+/**/
+/**/
+/* */
+/******************************COPYRIGHT NOTICE*******************************/