8 #include "io_lib_header.h"
9 #include "util_lib_header.h"
10 #include "define_header.h"
11 #include "dp_lib_header.h"
12 void print_atom ( Atom*A);
14 float **** quantile_apdb_filtration ( Alignment *A, float ****residues, Constraint_list *CL,Pdb_param *PP, FILE *fp);
15 float **** irmsdmin_apdb_filtration ( Alignment *A, float ****residues, Constraint_list *CL,Pdb_param *PP, FILE *fp);
16 int apdb ( int argc, char *argv[])
19 Constraint_list *CL=NULL;
31 /*PARAMETERS VARIABLES*/
54 float maximum_distance;
55 float similarity_threshold;
67 char **out_aln_format;
77 char **template_file_list;
88 char *prot_blast_server;
89 char *pdb_blast_server;
96 argv=standard_initialisation (argv, &argc);
98 /*PARAMETER PROTOTYPE: READ PARAMETER FILE */
99 declare_name (parameters);
104 /*Name*/ "-parameters" ,\
107 /*OPTIONAL?*/ OPTIONAL ,\
109 /*DOC*/ "Read the files in the parameter file" ,\
110 /*Parameter*/ ¶meters ,\
113 /*Min_value*/ "any" ,\
114 /*Max Value*/ "any" \
116 if ( parameters && parameters[0])
118 argv[argc]=vcalloc ( VERY_LONG_STRING, sizeof(char));
120 fp_parameters=vfopen (parameters, "r");
121 while ((c=fgetc (fp_parameters))!=EOF)argv[1][a++]=c;
122 vfclose (fp_parameters);
125 argv=break_list ( argv, &argc, "=:;, \n");
127 /*PARAMETER PROTOTYPE*/
128 declare_name (se_name);
136 /*OPTIONAL?*/ OPTIONAL ,\
139 /*Parameter*/ &se_name ,\
140 /*Def 1*/ "stderr" ,\
141 /*Def 2*/ "/dev/null" ,\
142 /*Min_value*/ "any" ,\
143 /*Max Value*/ "any" \
146 le=vfopen ( se_name, "w");
147 fprintf ( le, "\nPROGRAM: %s\n",argv[0]);
149 /*PARAMETER PROTOTYPE: IN */
150 list_file=declare_char ( 200, STRING);
151 n_list=get_cl_param(\
158 /*OPTIONAL?*/ OPTIONAL ,\
161 /*Parameter*/ list_file ,\
164 /*Min_value*/ "any" ,\
165 /*Max Value*/ "any" \
167 /*PARAMETER PROTOTYPE: IN */
168 struc_to_use=declare_char ( 200, STRING);
169 n_struc_to_use=get_cl_param(\
173 /*Name*/ "-struc_to_use" ,\
176 /*OPTIONAL?*/ OPTIONAL ,\
179 /*Parameter*/ struc_to_use ,\
182 /*Min_value*/ "any" ,\
183 /*Max Value*/ "any" \
186 /*PARAMETER PROTOTYPE: COMPARISON IO */
187 declare_name (comparison_io);
192 /*Name*/ "-io_format" ,\
195 /*OPTIONAL?*/ OPTIONAL ,\
198 /*Parameter*/ &comparison_io,\
199 /*Def 1*/ "hsgd0123456",\
201 /*Min_value*/ "any" ,\
202 /*Max Value*/ "any" \
204 /*PARAMETER PROTOTYPE: ALN */
213 /*OPTIONAL?*/ OPTIONAL ,\
219 /*Min_value*/ "any" ,\
220 /*Max Value*/ "any" \
222 /*PARAMETER PROTOTYPE: ALN */
224 declare_name (repeat_seq);
229 /*Name*/ "-repeat_seq" ,\
232 /*OPTIONAL?*/ OPTIONAL ,\
235 /*Parameter*/ &repeat_seq,\
238 /*Min_value*/ "any" ,\
239 /*Max Value*/ "any" \
242 /*PARAMETER PROTOTYPE: ALN */
243 declare_name (repeat_pdb);
248 /*Name*/ "-repeat_pdb" ,\
251 /*OPTIONAL?*/ OPTIONAL ,\
254 /*Parameter*/ &repeat_pdb,\
257 /*Min_value*/ "any" ,\
258 /*Max Value*/ "any" \
261 /*PARAMETER PROTOTYPE: Nb to exclude */
266 /*Name*/ "-n_excluded_nb" ,\
269 /*OPTIONAL?*/ OPTIONAL ,\
271 /*DOC*/ "Exclude the N Nb on each side of the central residue. -1 triggers an automatic setting equal to the window size corresponding to the sphere" ,\
272 /*Parameter*/ &n_excluded_nb ,\
275 /*Min_value*/ "any" ,\
276 /*Max Value*/ "any" \
278 /*PARAMETER PROTOTYPE: diatances to count */
283 /*Name*/ "-similarity_threshold" ,\
286 /*OPTIONAL?*/ OPTIONAL ,\
289 /*Parameter*/ &similarity_threshold,\
292 /*Min_value*/ "any" ,\
293 /*Max Value*/ "any" \
295 /*PARAMETER PROTOTYPE: diatances to count */
300 /*Name*/ "-filter" ,\
303 /*OPTIONAL?*/ OPTIONAL ,\
305 /*DOC*/ "Filter by only keeping the best quantile" ,\
306 /*Parameter*/ &filter,\
309 /*Min_value*/ "-1.00" ,\
310 /*Max Value*/ "1.00" \
312 /*PARAMETER PROTOTYPE: diatances to count */
317 /*Name*/ "-filter_aln" ,\
320 /*OPTIONAL?*/ OPTIONAL ,\
322 /*DOC*/ "Lower Case For Residues Filtered Out" ,\
323 /*Parameter*/ &filter_aln,\
329 /*PARAMETER PROTOTYPE: diatances to count */
334 /*Name*/ "-irmsd_graph" ,\
337 /*OPTIONAL?*/ OPTIONAL ,\
339 /*DOC*/ "Outputs the irmsd, position/position" ,\
340 /*Parameter*/ &irmsd_graph,\
346 /*PARAMETER PROTOTYPE: diatances to count */
351 /*Name*/ "-nirmsd_graph" ,\
354 /*OPTIONAL?*/ OPTIONAL ,\
356 /*DOC*/ "Outputs the NIRMSD VS N Removed Residues Curve" ,\
357 /*Parameter*/ &nirmsd_graph,\
363 /*PARAMETER PROTOTYPE: -rmsd_threshold */
368 /*Name*/ "-md_threshold" ,\
371 /*OPTIONAL?*/ OPTIONAL ,\
374 /*Parameter*/ &md_threshold ,\
377 /*Min_value*/ "any" ,\
378 /*Max Value*/ "any" \
381 /*PARAMETER PROTOTYPE: -maximum distances */
386 /*Name*/ "-maximum_distance" ,\
389 /*OPTIONAL?*/ OPTIONAL ,\
392 /*Parameter*/ &maximum_distance ,\
395 /*Min_value*/ "any" ,\
396 /*Max Value*/ "any" \
400 /*PARAMETER PROTOTYPE: -print_rapdb */
405 /*Name*/ "-print_rapdb" ,\
408 /*OPTIONAL?*/ OPTIONAL ,\
410 /*DOC*/ "Prints the neighborhood of each pair of aligned residues, along with the associated local score" ,\
411 /*Parameter*/ &print_rapdb ,\
414 /*Min_value*/ "any" ,\
415 /*Max Value*/ "any" \
418 /*PARAMETER PROTOTYPE: RUN_NAME */
419 declare_name (run_name);
424 /*Name*/ "-run_name" ,\
427 /*OPTIONAL?*/ OPTIONAL ,\
430 /*Parameter*/ &run_name ,\
431 /*Def 1*/ "default" ,\
433 /*Min_value*/ "default" ,\
434 /*Max Value*/ "any" \
436 /*PARAMETER PROTOTYPE: OUTFILE */
437 /*PARAMETER PROTOTYPE: OUTFILE */
438 declare_name ( outfile);
443 /*Name*/ "-outfile" ,\
446 /*OPTIONAL?*/ OPTIONAL ,\
449 /*Parameter*/ &outfile ,\
451 /*Def 2*/ "default" ,\
452 /*Min_value*/ "default" ,\
453 /*Max Value*/ "any" \
455 /*PARAMETER PROTOTYPE: OUTFILE */
456 declare_name ( apdb_outfile);
461 /*Name*/ "-apdb_outfile" ,\
464 /*OPTIONAL?*/ OPTIONAL ,\
467 /*Parameter*/ &apdb_outfile ,\
468 /*Def 1*/ "stdout" ,\
469 /*Def 2*/ "default" ,\
470 /*Min_value*/ "any" ,\
471 /*Max Value*/ "any" \
474 /*PARAMETER PROTOTYPE: OUTPUT_FORMAT */
475 out_aln_format=declare_char ( 200, STRING);
476 n_out_aln_format=get_cl_param(\
480 /*Name*/ "-output" ,\
483 /*OPTIONAL?*/ OPTIONAL ,\
486 /*Parameter*/ out_aln_format,\
487 /*Def 1*/ "score_html" ,\
489 /*Min_value*/ "any" ,\
490 /*Max Value*/ "any" \
495 /*PARAMETER PROTOTYPE: INFILE */
496 declare_name (color_mode);
501 /*Name*/ "-color_mode" ,\
504 /*OPTIONAL?*/ OPTIONAL ,\
507 /*Parameter*/ &color_mode ,\
510 /*Min_value*/ "any" ,\
511 /*Max Value*/ "any" \
513 /*PARAMETER PROTOTYPE: INFILE */
514 declare_name (output_res_num);
519 /*Name*/ "-seqnos" ,\
522 /*OPTIONAL?*/ OPTIONAL ,\
525 /*Parameter*/ &output_res_num ,\
528 /*Min_value*/ "any" ,\
529 /*Max Value*/ "any" \
531 declare_name (cache);
539 /*OPTIONAL?*/ OPTIONAL ,\
541 /*DOC*/ "use,ignore,update,local, directory name" ,\
542 /*Parameter*/ &cache ,\
544 /*Def 2*/ "update" ,\
545 /*Min_value*/ "any" ,\
546 /*Max Value*/ "any" \
549 declare_name (local_mode);
554 /*Name*/ "-local_mode" ,\
557 /*OPTIONAL?*/ OPTIONAL ,\
559 /*DOC*/ "Mode for choosing the Neighborhood (bubble or window)\nWhen selecting window, maximum distance becomes the window 1/2 size, in residues\nWhen using sphere, maximum_distance is the sphere radius in Angstrom" ,\
560 /*Parameter*/ &local_mode ,\
561 /*Def 1*/ "sphere" ,\
562 /*Def 2*/ "window" ,\
563 /*Min_value*/ "any" ,\
564 /*Max Value*/ "any" \
567 /*PARAMETER PROTOTYPE: IN */
568 template_file_list=declare_char (100, STRING);
569 n_template_file=get_cl_param( \
573 /*Name*/ "-template_file" ,\
576 /*OPTIONAL?*/ OPTIONAL ,\
578 /*DOC*/ "List of templates file for the sequences",\
579 /*Parameter*/ template_file_list , \
580 /*Def 1*/ "_SELF_P_",\
582 /*Min_value*/ "any" ,\
583 /*Max Value*/ "any" \
585 /*PARAMETER PROTOTYPE: MODE */
594 /*OPTIONAL?*/ OPTIONAL ,\
596 /*DOC*/ "Mode: irmsd, ",\
597 /*Parameter*/ &mode , \
600 /*Min_value*/ "any" ,\
601 /*Max Value*/ "any" \
610 /*Name*/ "-prot_min_sim" ,\
611 /*Flag*/ &prot_min_sim ,\
613 /*OPTIONAL?*/ OPTIONAL ,\
615 /*DOC*/ "Minimum similarity between a sequence and its PDB target" ,\
616 /*Parameter*/ &prot_min_sim ,\
619 /*Min_value*/ "any" ,\
620 /*Max Value*/ "any" \
622 set_int_variable ("prot_min_sim", prot_min_sim);
628 /*Name*/ "-prot_max_sim" ,\
629 /*Flag*/ &prot_max_sim ,\
631 /*OPTIONAL?*/ OPTIONAL ,\
633 /*DOC*/ "Maximum similarity between a sequence and its BLAST relatives" ,\
634 /*Parameter*/ &prot_max_sim ,\
637 /*Min_value*/ "any" ,\
638 /*Max Value*/ "any" \
640 set_int_variable ("prot_max_sim", prot_max_sim);
646 /*Name*/ "-prot_min_cov" ,\
647 /*Flag*/ &prot_min_cov ,\
649 /*OPTIONAL?*/ OPTIONAL ,\
651 /*DOC*/ "Minimum coverage of a sequence by its BLAST relatives" ,\
652 /*Parameter*/ &prot_min_cov ,\
655 /*Min_value*/ "any" ,\
656 /*Max Value*/ "any" \
658 set_int_variable ("prot_min_cov", prot_min_cov);
664 /*Name*/ "-pdb_min_sim" ,\
665 /*Flag*/ &pdb_min_sim ,\
667 /*OPTIONAL?*/ OPTIONAL ,\
669 /*DOC*/ "Minimum similarity between a sequence and its PDB target" ,\
670 /*Parameter*/ &pdb_min_sim ,\
673 /*Min_value*/ "any" ,\
674 /*Max Value*/ "any" \
677 set_int_variable ("pdb_min_sim", pdb_min_sim);
682 /*Name*/ "-pdb_max_sim" ,\
683 /*Flag*/ &pdb_max_sim ,\
685 /*OPTIONAL?*/ OPTIONAL ,\
687 /*DOC*/ "Maximum similarity between a sequence and its PDB target" ,\
688 /*Parameter*/ &pdb_max_sim ,\
691 /*Min_value*/ "any" ,\
692 /*Max Value*/ "any" \
694 set_int_variable ("pdb_max_sim", pdb_max_sim);
699 /*Name*/ "-pdb_min_cov" ,\
700 /*Flag*/ &pdb_min_cov ,\
702 /*OPTIONAL?*/ OPTIONAL ,\
704 /*DOC*/ "Minimum coverage of a sequence by its PDB target" ,\
705 /*Parameter*/ &pdb_min_cov ,\
708 /*Min_value*/ "any" ,\
709 /*Max Value*/ "any" \
711 set_int_variable ("pdb_min_cov", pdb_min_cov);
715 declare_name (pdb_blast_server);
720 /*Name*/ "-pdb_blast_server" ,\
723 /*OPTIONAL?*/ OPTIONAL ,\
726 /*Parameter*/&pdb_blast_server ,\
728 /*Def 2*/ "default" ,\
729 /*Min_value*/ "any" ,\
730 /*Max Value*/ "any" \
732 declare_name (prot_blast_server);
740 /*OPTIONAL?*/ OPTIONAL ,\
743 /*Parameter*/&prot_blast_server ,\
746 /*Min_value*/ "any" ,\
747 /*Max Value*/ "any" \
749 //make sure that -blast and -blast_server are both supported blast>blast_server
750 if ( !prot_blast_server[0])
756 /*Name*/ "-blast_server" ,\
759 /*OPTIONAL?*/ OPTIONAL ,\
762 /*Parameter*/&prot_blast_server ,\
764 /*Def 2*/ "default" ,\
765 /*Min_value*/ "any" ,\
766 /*Max Value*/ "any" \
769 set_string_variable ("blast_server", prot_blast_server);
773 declare_name (pdb_db);
778 /*Name*/ "-pdb_db" ,\
781 /*OPTIONAL?*/ OPTIONAL ,\
783 /*DOC*/ "Non Redundant PDB database" ,\
784 /*Parameter*/&pdb_db ,\
786 /*Def 2*/ "default" ,\
787 /*Min_value*/ "any" ,\
788 /*Max Value*/ "any" \
790 set_string_variable ("pdb_db", pdb_db);
793 declare_name (prot_db);
798 /*Name*/ "-protein_db" ,\
801 /*OPTIONAL?*/ OPTIONAL ,\
804 /*Parameter*/&prot_db ,\
805 /*Def 1*/ "uniprot" ,\
806 /*Def 2*/ "default" ,\
807 /*Min_value*/ "any" ,\
808 /*Max Value*/ "any" \
815 /*Name*/ "-gapped" ,\
818 /*OPTIONAL?*/ OPTIONAL ,\
821 /*Parameter*/&gapped ,\
824 /*Min_value*/ "any" ,\
825 /*Max Value*/ "any" \
831 /*Name*/ "-min_ncol" ,\
834 /*OPTIONAL?*/ OPTIONAL ,\
836 /*DOC*/ "minimum number of columns (negative: fraction)" ,\
837 /*Parameter*/&min_ncol ,\
840 /*Min_value*/ "any" ,\
841 /*Max Value*/ "any" \
843 // set the correct mode:
844 if ( strm (argv[0], "trmsd"))sprintf (mode, "trmsd");
846 set_string_variable ("prot_db", prot_db);
849 if (argc==1){myexit (EXIT_SUCCESS);}
851 if ( strm (outfile,"no"))n_out_aln_format=0;
853 get_cl_param( argc, argv,&le, NULL,NULL,NULL,0,0,NULL);
854 prepare_cache (cache);
858 sprintf ( aln, "%s", argv[1]);
862 printf_exit (EXIT_FAILURE, stderr, "\n\n---- ERROR: File %s must be a valid alignment [FATAL:%s-%s]\n\n",aln,argv[0], PROGRAM);
865 pdb_param=vcalloc ( 1, sizeof(Pdb_param));
867 pdb_param->similarity_threshold=similarity_threshold;
869 pdb_param->md_threshold=md_threshold;
870 pdb_param->maximum_distance=maximum_distance;
872 if ( n_excluded_nb>0)
873 pdb_param->n_excluded_nb=n_excluded_nb;
874 else if ( n_excluded_nb==-1)
875 pdb_param->n_excluded_nb=(int)((float)maximum_distance/(float)1.57);
876 /* Exclude all the nb within the bubble at +1, +2, +n*/
877 pdb_param->print_rapdb=print_rapdb;
878 pdb_param->comparison_io=comparison_io;
880 pdb_param->local_mode=local_mode;
881 pdb_param->color_mode=lower_string (color_mode);
882 pdb_param->filter=filter;
883 pdb_param->filter_aln=filter_aln;
884 pdb_param->irmsd_graph=irmsd_graph;
885 pdb_param->nirmsd_graph=nirmsd_graph;
887 sprintf ( list_file[n_list++], "S%s", aln);
890 if (!strm (repeat_seq, ""))
893 sprintf ( template_file_list[0], "%s", process_repeat (list_file[0], repeat_seq, repeat_pdb));
894 fprintf ( le, "\n##Turn a repeat List into a Template File\n");
895 le=display_file_content (le,template_file_list[0]);
896 fprintf ( le, "\n\n");
898 S=read_seq_in_n_list (list_file, n_list, NULL, NULL);
900 le=display_sequences_names ( S,le,0, 0);
902 if ( n_template_file)
904 fprintf ( le, "\nLooking For Sequence Templates:\n");
905 for ( a=0; a< n_template_file; a++)
907 fprintf ( le, "\n\tTemplate Type: [%s] Mode Or File: [%s] [Start", template_type2type_name(template_file_list[a]), template_file_list[a]);
908 S=seq2template_seq(S, template_file_list[a], F);
913 if ( !strm (run_name, "default"))
915 F=parse_fname(run_name);
916 sprintf (F->name, "%s", F->full);
923 for ( a=0; a< S->nseq; a++)
927 p=seq2T_value (S, a, "template_file", "_P_");
929 if (p)sprintf (S->file[a], "%s",p);
932 CL=declare_constraint_list ( S,NULL, NULL, 0,NULL, NULL);
933 CL->T=vcalloc (S->nseq,sizeof (Ca_trace*));
936 for ( n_pdb=0,a=0; a<S->nseq; a++)
938 if ( !is_pdb_file ( S->file[a])){CL->T[a]=NULL;continue;}
939 CL->T[a]=read_ca_trace (S->file[a], "ATOM");
940 CL->T[a]=trim_ca_trace (CL->T[a], S->seq[a]);
941 (CL->T[a])->pdb_param=pdb_param;
948 A->residue_case=KEEP_CASE;
949 A=main_read_aln(aln, A);
953 if ( strm (apdb_outfile, "default"))
954 sprintf ( apdb_outfile, "%s.apdb_result", F->name);
959 fp=vfopen (apdb_outfile, "w");
960 fprintf (fp, "\nYour Alignment Does Not Contain Enough Sequences With a known Structure\n");
961 fprintf (fp, "To Use APDB, your alignment must include at least TWO sequences with a known structure.\n");
962 fprintf (fp, "These sequences must be named according to their PDB identifier, followed by the chain index (if any) ex: 1fnkA\n");
963 fprintf (fp, "[FATAL:%s]\n", PROGRAM);
966 else if ( strm (mode, "irmsd"))
968 EA=analyse_pdb ( A, EA, apdb_outfile);
970 else if ( strm (mode, "msa2tree") || strm (mode, "trmsd"))
972 EA=msa2struc_dist ( A, EA,F->name, gapped, min_ncol);
974 le=display_output_filename ( le, "APDB_RESULT", "APDB_RESULT_FORMAT_01", apdb_outfile, CHECK);
978 declare_name (file_name);
979 for ( a=0; a< n_out_aln_format; a++)
981 if ( strm2( outfile, "stdout", "stderr"))sprintf (file_name, "%s", outfile);
982 else if ( strm (outfile, "default"))
983 sprintf (file_name, "%s.%s",F->name, out_aln_format[a]);
985 sprintf (file_name, "%s.%s",outfile,out_aln_format[a]);
987 output_format_aln (out_aln_format[a],A,EA,file_name);
988 le=display_output_filename ( le, "MSA", out_aln_format[a], file_name, CHECK);
996 Constraint_list * set_constraint_list4align_pdb (Constraint_list *CL,int seq, char *dp_mode, char *local_mode, char *param_file)
998 static Constraint_list *PWCL;
999 static Pdb_param *pdb_param;
1005 free_constraint_list (PWCL);
1010 PWCL=declare_constraint_list ( CL->S,NULL, NULL, 0,NULL, NULL);
1012 pdb_param=vcalloc ( 1, sizeof(Pdb_param));
1014 pdb_param->max_delta=2.0;
1015 pdb_param->maximum_distance=14;
1016 declare_name (pdb_param->local_mode);
1017 sprintf (pdb_param->local_mode, "%s", local_mode);
1018 pdb_param->scale=50;
1020 PWCL->pw_parameters_set=1;
1022 PWCL->lalign_n_top=10;
1023 PWCL->sw_min_dist=10;
1025 PWCL->T=vcalloc ( (PWCL->S)->nseq, sizeof (Ca_trace*));
1032 sprintf (CL->matrix_for_aa_group, "vasiliky");
1033 PWCL->use_fragments=0;
1039 if ( param_file && check_file_exists ( param_file) )
1041 if ( (x=get_parameter ( "-nca", &n, param_file))!=NULL){pdb_param->N_ca=atoi(x[0]);free_char (x, -1);}
1042 if ( (x=get_parameter ( "-max_delta", &n, param_file))!=NULL){pdb_param->max_delta=atof(x[0]);free_char (x, -1);}
1043 if ( (x=get_parameter ( "-maximum_distance", &n, param_file))!=NULL){pdb_param->maximum_distance=atoi(x[0]);free_char (x, -1);}
1044 if ( (x=get_parameter ( "-local_mode", &n, param_file))!=NULL){sprintf (pdb_param->local_mode, "%s",x[0]);free_char (x, -1);}
1045 if ( (x=get_parameter ( "-scale", &n, param_file))!=NULL){pdb_param->scale=atoi(x[0]);free_char (x, -1);}
1046 if ( (x=get_parameter ( "-gapopen", &n, param_file))!=NULL){PWCL->gop=atoi(x[0]);free_char (x, -1);}
1047 if ( (x=get_parameter ( "-gapext" , &n, param_file))!=NULL){PWCL->gep=atof(x[0]);free_char (x, -1);}
1054 sprintf ( PWCL->dp_mode, "%s", dp_mode);
1056 if (strm (PWCL->dp_mode, "lalign"))sprintf (PWCL->dp_mode,"sim_pair_wise_lalign");
1057 else if (strm (PWCL->dp_mode, "sw"))sprintf (PWCL->dp_mode,"gotoh_pair_wise_sw");
1059 local_mode=pdb_param->local_mode;
1060 if ( strm ( local_mode, "hasch_ca_trace_nb")) PWCL->evaluate_residue_pair=evaluate_ca_trace_nb;
1061 else if ( strm ( local_mode, "hasch_ca_trace_bubble")) PWCL->evaluate_residue_pair=evaluate_ca_trace_bubble;
1062 else if ( strm ( local_mode, "hasch_ca_trace_sap1_bubble")) PWCL->evaluate_residue_pair=evaluate_ca_trace_sap1_bubble;
1063 else if ( strm ( local_mode, "hasch_ca_trace_sap2_bubble")) PWCL->evaluate_residue_pair=evaluate_ca_trace_sap2_bubble;
1065 else if ( strm ( local_mode, "hasch_ca_trace_transversal")) PWCL->evaluate_residue_pair=evaluate_ca_trace_transversal;
1066 else if ( strm ( local_mode, "hasch_ca_trace_bubble_2")) PWCL->evaluate_residue_pair=evaluate_ca_trace_bubble_2;
1067 else if ( strm ( local_mode, "hasch_ca_trace_bubble_3")) PWCL->evaluate_residue_pair=evaluate_ca_trace_bubble_3;
1068 else if ( strm ( local_mode, "custom_pair_score_function1")) PWCL->evaluate_residue_pair=custom_pair_score_function1;
1069 else if ( strm ( local_mode, "custom_pair_score_function2")) PWCL->evaluate_residue_pair=custom_pair_score_function2;
1070 else if ( strm ( local_mode, "custom_pair_score_function3")) PWCL->evaluate_residue_pair=custom_pair_score_function3;
1071 else if ( strm ( local_mode, "custom_pair_score_function4")) PWCL->evaluate_residue_pair=custom_pair_score_function4;
1072 else if ( strm ( local_mode, "custom_pair_score_function5")) PWCL->evaluate_residue_pair=custom_pair_score_function5;
1073 else if ( strm ( local_mode, "custom_pair_score_function6")) PWCL->evaluate_residue_pair=custom_pair_score_function6;
1074 else if ( strm ( local_mode, "custom_pair_score_function7")) PWCL->evaluate_residue_pair=custom_pair_score_function7;
1075 else if ( strm ( local_mode, "custom_pair_score_function8")) PWCL->evaluate_residue_pair=custom_pair_score_function8;
1076 else if ( strm ( local_mode, "custom_pair_score_function9")) PWCL->evaluate_residue_pair=custom_pair_score_function9;
1077 else if ( strm ( local_mode, "custom_pair_score_function10")) PWCL->evaluate_residue_pair=custom_pair_score_function10;
1082 fprintf ( stderr, "\n%s is an unknown hasch mode, [FATAL]\n", local_mode);
1083 myexit (EXIT_FAILURE);
1089 PWCL->T[seq]=read_ca_trace (is_pdb_struc((CL->S)->name[seq]), "ATOM");
1090 (PWCL->T[seq])->pdb_param=pdb_param;
1091 PWCL->T[seq]=trim_ca_trace (PWCL->T[seq], (CL->S)->seq[seq]);
1092 PWCL->T[seq]=hasch_ca_trace(PWCL->T[seq]);
1102 int evaluate_ca_trace_nb (Constraint_list *CL, int s1, int r1, int s2, int r2)
1105 return (int)(neighborhood_match(CL, s1,r1, s2, r2, (CL->T[s1])->Chain,(CL->T[s2])->Chain ));
1107 int evaluate_ca_trace_sap2_bubble (Constraint_list *CL, int s1, int r1, int s2, int r2)
1112 return sap2_neighborhood_match (CL, s1, r1, s2, r2, (CL->T[s1])->Bubble,(CL->T[s2])->Bubble );
1115 int evaluate_ca_trace_sap1_bubble (Constraint_list *CL, int s1, int r1, int s2, int r2)
1118 Function documentation: start
1120 int evaluate_ca_trace_sap1_bubble (Constraint_list *CL, int s1, int s2, int r1, int r2)
1121 This function evaluates the cost for matching two residues:
1123 a1 is the cost for matching the two neighborood ( bubble type), using sap
1124 a1: [0,+100], +100 is the best possible match.
1125 a2 is the residue type weight:
1126 min=worst substitution value
1127 best=best of r1/r1, r2/r2-min
1129 a2=(r1/r2 -min)/best --> a1:[0, 100]
1131 score=a1*a2-->[-inf, +10000];
1139 a1=(int) sap1_neighborhood_match (CL, s1, r1, s2, r2, (CL->T[s1])->Bubble,(CL->T[s2])->Bubble );
1145 int evaluate_ca_trace_bubble (Constraint_list *CL, int s1, int r1, int s2, int r2)
1148 Function documentation: start
1150 int evaluate_ca_trace_bubble (Constraint_list *CL, int s1, int s2, int r1, int r2)
1151 This function evaluates the cost for matching two residues:
1153 a1 is the cost for matching the two neighborood ( bubble type)
1154 a1: [-inf,+100-scale], +100-scale is the best possible match.
1164 a1=(int) neighborhood_match (CL, s1, r1, s2, r2, (CL->T[s1])->Bubble,(CL->T[s2])->Bubble )-((CL->T[s1])->pdb_param)->scale;
1170 int evaluate_ca_trace_transversal (Constraint_list *CL, int s1, int r1, int s2, int r2)
1172 return (int)(transversal_match (CL, s1, r1, s2, r2, (CL->T[s1])->Transversal,(CL->T[s2])->Transversal ));
1175 int evaluate_ca_trace_bubble_3 (Constraint_list *CL, int s1, int r1, int s2, int r2)
1177 /*This Mode evaluates :
1180 2-The Match of the transversal residues
1187 l1=MAX(( (CL->T[s1])->Chain )->nb[r1][0] ,((CL->T[s2])->Chain )->nb[r2][0]);
1188 l2=MAX(( (CL->T[s1])->Bubble)->nb[r1][0], ((CL->T[s2])->Bubble)->nb[r2][0]);
1190 a1=(int)(neighborhood_match (CL, s1, r1, s2, r2, (CL->T[s1])->Bubble,(CL->T[s2])->Bubble ));
1191 a2=(int)(transversal_match (CL, s1, r1, s2, r2, (CL->T[s1])->Transversal,(CL->T[s2])->Transversal ));
1193 if ( !l1 && !l2)return 0;
1197 int evaluate_ca_trace_bubble_2 (Constraint_list *CL, int s1, int r1, int s2, int r2)
1199 /*This Mode evaluates :
1200 1-The Ca neighborhood
1205 return (int)((neighborhood_match (CL, s1, r1, s2, r2, (CL->T[s1])->Chain,(CL->T[s2])->Chain )));
1209 /*********************************************************************************************/
1211 /* FUNCTIONS FOR COMPARING TWO NEIGHBORHOODS:START */
1213 /*********************************************************************************************/
1214 float matrix_match (Constraint_list *CL, int s1, int r1, int s2, int r2, Struct_nb *nbs1, Struct_nb *nbs2)
1218 Function documentation: start
1220 float matrix_match (Constraint_list *CL, int s1, int s2, int r1, int r2, Struct_nb *nbs1, Struct_nb *nbs2)
1221 This function evaluates the matrix for matching two residues:
1223 min=worst substitution value
1224 best=best of r1/r1, r2/r2-min
1226 a2=(r1/r2 -min)/best --> a1:[0, 100]
1228 score=a1*a2-->[-inf, +10000];
1240 CL->M=read_matrice ( "pam250mt");
1242 for ( a=0; a< 26; a++)
1243 for ( b=0; b< 26; b++)min=MIN(min, CL->M[a][b]);
1246 if ( r1<=0 || r2<=0)return 0;
1247 m1=CL->M[(CL->S)->seq[s1][r1-1]-'A'][(CL->S)->seq[s1][r1-1]-'A']-min;
1248 m2=CL->M[(CL->S)->seq[s2][r2-1]-'A'][(CL->S)->seq[s2][r2-1]-'A']-min;
1250 a2=(CL->M[(CL->S)->seq[s1][r1-1]-'A'][(CL->S)->seq[s2][r2-1]-'A']-min)/m;
1256 float transversal_match (Constraint_list *CL, int s1, int r1, int s2, int r2, Struct_nb *nbs1, Struct_nb *nbs2)
1260 float delta, max_delta;
1264 PP=(CL->T[s1])->pdb_param;
1265 max_delta=PP->max_delta;
1270 if ( l1!=l2 || l1<(PP->N_ca)) return 0;
1273 max=MAX(l1, l2)*max_delta;
1274 for ( delta=0,a=0; a< l2 ; a++)
1277 delta+=max_delta-FABS((nbs1->d_nb[r1][a]-nbs2->d_nb[r2][a]));
1279 score=(delta*100)/max;
1286 float neighborhood_match (Constraint_list *CL, int s1, int r1, int s2, int r2, Struct_nb *nbs1, Struct_nb *nbs2)
1288 static float **table;
1289 static int table_size;
1292 float ins, del, sub;
1293 float delta, max_delta;
1298 PP=(CL->T[s1])->pdb_param;
1299 max_delta=PP->max_delta;
1302 if ( r1> 0 && r2 >0) {r1--; r2--;}
1308 if (table_size< (MAX(l1, l2)+1))
1310 table_size=MAX(l1, l2)+1;
1311 if ( table)free_float (table, -1);
1314 if ( !table) table=declare_float (table_size, table_size);
1317 max=MAX(l1, l2)*max_delta;
1318 if ( max==0)return 0;
1322 for ( b=1; b<=l2; b++)
1326 for ( a=1; a<=l1; a++)
1329 for ( b=1; b<=l2 ; b++)
1332 delta=max_delta-FABS((nbs1->d_nb[r1][a]-nbs2->d_nb[r2][b]));
1336 sub= table[a-1][b-1]+delta;
1338 if ( del >= ins && del >= sub)score=del;
1339 else if ( ins >= del && ins >= sub) score=ins;
1346 score=((((score)*100)/max));
1352 float sap1_neighborhood_match (Constraint_list *CL, int s1, int r1, int s2, int r2, Struct_nb *nbs1, Struct_nb *nbs2)
1355 Function documentation: start
1357 float sap1_neighborhood_match (Constraint_list *CL, int s1, int s2, int r1, int r2, Struct_nb *nbs1, Struct_nb *nbs2)
1358 This function is adapted from Taylor, Orengo, Protein Structure Alignment JMB 1989, (208)1-22
1359 It is the first function where
1360 score= A/(|dra-drb|+b)
1362 Function documentation: end
1365 static float **table;
1366 static int table_size;
1369 float ins, del, sub;
1381 if ( r1> 0 && r2 >0) {r1--; r2--;}
1387 if (table_size< (MAX(l1, l2)+1))
1389 table_size=MAX(l1, l2)+1;
1390 if ( table)free_float (table, -1);
1393 if ( !table) table=declare_float (table_size, table_size);
1396 max=MAX(l1, l2)*(A/B);
1397 if ( max==0)return 0;
1401 for ( b=1; b<=l2; b++)
1405 for ( a=1; a<=l1; a++)
1408 for ( b=1; b<=l2 ; b++)
1411 delta=A/(FABS((nbs1->d_nb[r1][a]-nbs2->d_nb[r2][b]))+B);
1415 sub= table[a-1][b-1]+delta;
1416 if ( del >= ins && del >= sub)score=del;
1417 else if ( ins >= del && ins >= sub) score=ins;
1424 score=((score*100))/(max);
1430 float sap2_neighborhood_match (Constraint_list *CL, int s1, int r1, int s2, int r2, Struct_nb *nbs1, Struct_nb *nbs2)
1433 Function documentation: start
1435 float sap1_neighborhood_match (Constraint_list *CL, int s1, int s2, int r1, int r2, Struct_nb *nbs1, Struct_nb *nbs2)
1436 This function is adapted from Taylor, Orengo, Protein Structure Alignment JMB 1989, (208)1-22
1437 It is the first function where
1438 score= A/(|dra-drb|+b)
1440 Function documentation: end
1443 static float **table;
1444 static int table_size;
1447 float ins, del, sub;
1453 static Atom *vX_1, *vY_1, *vZ_1;
1454 static Atom *vX_2, *vY_2, *vZ_2;
1455 static Atom *ca1, *ca2;
1464 if ( r1> 0 && r2 >0) {r1--; r2--;}
1467 /*Make up the referencial*/
1468 pep1=(CL->T[s1])->peptide_chain;
1469 pep2=(CL->T[s2])->peptide_chain;
1471 /*Get Referencial for CA1*/
1472 if ( (pep1[r1])->C)vX_1 =diff_atom(pep1[r1]->C,pep1[r1]->CA, vX_1);
1473 if ( (pep1[r1])->N)vY_1 =diff_atom(pep1[r1]->N,pep1[r1]->CA, vY_1);
1474 if ( (pep1[r1])->CB)vZ_1=diff_atom(pep1[r1]->CB,(pep1[r1])->CA,vZ_1);
1475 else vZ_1=add_atom (vX_1, vY_1, vZ_1);
1481 /*Get Referencial for CA2*/
1482 if ( (pep2[r2])->C)vX_2 =diff_atom((pep2[r2])->C,(pep2[r2])->CA, vX_2);
1483 if ( (pep2[r2])->N)vY_2 =diff_atom((pep2[r2])->N,(pep2[r2])->CA, vY_2);
1484 if ( (pep2[r2])->CB)vZ_2=diff_atom((pep2[r2])->CB,(pep2[r2])->CA, vZ_2);
1485 else vZ_2=add_atom (vX_2, vY_2, vZ_2);
1490 /*END OF GETTING REFERENCIAL*/
1495 fprintf (stdout,"\n\t*******");
1497 fprintf (stdout, "RESIDUE %d %c", r1, (CL->S)->seq[s1][r1]);
1498 if ( (pep1[r1])->CA)fprintf (stdout,"\n\tCA ");print_atom (pep1[r1]->CA );
1499 if ( (pep1[r1])->C)fprintf (stdout,"\n\tC ");print_atom (pep1[r1]->C );
1500 if ( (pep1[r1])->N)fprintf (stdout,"\n\tN ");print_atom (pep1[r1]->N );
1501 if ( (pep1[r1])->CB)fprintf (stdout,"\n\tCB ");print_atom (pep1[r1]->CB );
1502 fprintf (stdout,"\n\t*******");
1503 fprintf (stdout,"\n\tvX ");print_atom ( vX_1);
1504 fprintf (stdout,"\n\tvY ");print_atom ( vY_1);
1505 fprintf (stdout,"\n\tvZ ");print_atom ( vZ_1);
1507 ca1= copy_atom ((pep1[r1-1])->CA, ca1);
1508 ca1 =diff_atom(ca1,(pep1[r1])->CA, ca1);
1509 fprintf (stdout,"\n\tca ");print_atom ( ca1);
1510 fprintf ( stdout, "\n\tSQ1=%d ", (int)square_atom(ca1));
1511 ca1=reframe_atom(vX_1, vY_1, vZ_1, ca1, ca1);
1512 fprintf ( stdout, "\n\tSQ2=%d ", (int)square_atom(ca1));
1513 fprintf (stdout,"\n\tca ");print_atom ( ca1);
1514 fprintf (stdout,"\n\n");
1521 if (table_size< (MAX(l1, l2)+1))
1523 table_size=MAX(l1, l2)+1;
1524 if ( table)free_float (table, -1);
1527 if ( !table) table=declare_float (table_size, table_size);
1530 max=MAX(l1, l2)*(A/B);
1532 if ( max==0)return 0;
1536 for ( b=1; b<=l2; b++)
1541 for ( a=1; a<=l1; a++)
1543 ca1=copy_atom ((CL->T[s1])->structure[nbs1->nb[r1][a]], ca1);
1544 ca1=diff_atom(ca1,(pep1[r1])->CA, ca1);
1545 ca1=reframe_atom(vX_1, vY_1, vZ_1, ca1, ca1);
1548 for ( b=1; b<=l2 ; b++)
1550 ca2 =copy_atom((CL->T[s2])->structure[nbs2->nb[r2][b]], ca2);
1551 ca2 =diff_atom(ca2,(pep2[r2])->CA, ca2);
1552 ca2 =reframe_atom(vX_2, vY_2, vZ_2, ca2, ca2);
1554 ca2=diff_atom(ca2,ca1,ca2);
1555 val=square_atom (ca2);
1557 val=(float)sqrt ((double)val);
1564 sub= table[a-1][b-1]+delta;
1566 if ( del >= ins && del >= sub)score=del;
1567 else if ( ins >= del && ins >= sub) score=ins;
1574 score=(((score*100))/(max)-50);
1580 /*********************************************************************************************/
1584 /*********************************************************************************************/
1585 float **** irmsdmin_apdb_filtration ( Alignment *A, float ****residues, Constraint_list *CL, Pdb_param *PP, FILE *fp)
1587 int s1, s2, a,col1, n,n2=0, t,flag;
1589 float nirmsd, min_nirmsd,max_nirmsd,ref_sum, sum, sum2;
1590 float **normalized_len;
1592 normalized_len=declare_float (A->nseq+1, A->nseq+1);
1593 for (s1=0; s1<A->nseq; s1++)
1595 int l1, l2, r1, r2, p;
1596 for (s2=0; s2<A->nseq; s2++)
1598 for ( l1=l2=p=0; p< A->len_aln; p++)
1600 r1=A->seq_al[s1][p];
1601 r2=A->seq_al[s2][p];
1602 if (!is_gap(r1) && isupper(r1))l1++;
1603 if (!is_gap(r2) && isupper(r2))l2++;
1605 normalized_len[s1][s2]=MIN(l1,l2);
1609 pos=aln2pos_simple (A, A->nseq);
1610 for ( s1=0; s1< A->nseq; s1++)
1611 for ( s2=0; s2<A->nseq; s2++)
1613 if ( s1==s2) continue;
1614 else if (!(CL->T[A->order[s1][0]]) || !(CL->T[A->order[s2][0]]))continue;
1616 list=declare_int (A->len_aln, 2);
1618 for ( sum=0,n=0,col1=0; col1< A->len_aln; col1++)
1620 if ( islower (A->seq_al[s1][col1]) || islower ( A->seq_al[s2][col1]))continue;
1621 else if ( pos[s1][col1]<=0 || pos[s2][col1]<=0 ) continue;
1622 else if ( residues[s1][s2][pos[s1][col1]-1][0]==0)continue;
1624 list[n][0]=pos[s1][col1]-1;
1625 list[n][1]=(int)100000*residues[s1][s2][pos[s1][col1]-1][4];
1626 sum2+=residues[s1][s2][pos[s1][col1]-1][4];
1630 if (n==0)return residues;
1632 sort_int_inv (list, 2, 1,0, n-1);
1633 for (sum=0,a=0; a<n; a++)
1638 nirmsd=min_nirmsd=max_nirmsd=sum/(n*n);
1642 /*1 Find the maximum*/
1644 for (flag=0,a=0; a< n-1; a++)
1647 nirmsd=sum/((n-(a+1))*(n-(a+1)));
1648 if (nirmsd<max_nirmsd)flag=1;
1649 if ((nirmsd>=max_nirmsd) && flag==1)break;
1654 for (a=0; a<n2-1; a++)
1657 nirmsd=sum/((n-(a+1))*(n-(a+1)));
1660 if ( nirmsd<min_nirmsd)
1664 if ( PP->nirmsd_graph)
1666 fprintf ( stdout, "\n_NIRMSD_GRAPH %s %s POS: %4d Removed: %4d NiRMSD: %.2f", A->name[s1], A->name[s2], list[a][0],a,(nirmsd/100000)*normalized_len[s1][s2]);
1671 if ( PP->print_rapdb)
1673 for ( a=0; a<n; a++)
1675 if ( list[a][1]>0 && a<=t)fprintf ( stdout, "\nRAPDB QUANTILE REMOVE S1: %3d S2: %3d COL: %3d SCORE*100: %d", s1, s2, list[a][0], list[a][1]);
1676 else if ( list[a][1]>0 && a>t)fprintf ( stdout, "\nRAPDB QUANTILE KEEP S1: %3d S2: %3d COL: %3d SCORE*100: %d", s1, s2, list[a][0], list[a][1]);
1680 fprintf ( stdout, "\n# MINIMISATION FILTER ON: NiRMSD minimsation resulted in the removal of %d [out of %d] Columns On the alignment %s Vs %s\n", t, n, A->name[s1], A->name[s2]);
1681 for ( a=0; a<=t; a++)
1684 residues[s1][s2][list[a][0]][0]=0;
1685 residues[s1][s2][list[a][0]][1]=0;
1686 residues[s1][s2][list[a][0]][2]=0;
1687 residues[s1][s2][list[a][0]][3]=0;
1688 residues[s1][s2][list[a][0]][4]=-1;
1692 free_int (list, -1);
1694 free_float (normalized_len, -1);
1697 float **** quantile_apdb_filtration ( Alignment *A, float ****residues, Constraint_list *CL, Pdb_param *PP,FILE *fp)
1699 int s1, s2, a,col1, n, t;
1702 pos=aln2pos_simple (A, A->nseq);
1703 for ( s1=0; s1< A->nseq; s1++)
1704 for ( s2=0; s2<A->nseq; s2++)
1706 if ( s1==s2) continue;
1707 else if (!(CL->T[A->order[s1][0]]) || !(CL->T[A->order[s2][0]]))continue;
1709 list=declare_int (A->len_aln, 2);
1711 for ( n=0,col1=0; col1< A->len_aln; col1++)
1713 if ( islower (A->seq_al[s1][col1]) || islower ( A->seq_al[s2][col1]))continue;
1714 else if ( pos[s1][col1]<=0 || pos[s2][col1]<=0 ) continue;
1716 list[n][0]=pos[s1][col1]-1;
1717 list[n][1]=(int)100*residues[s1][s2][pos[s1][col1]-1][4];
1722 sort_int_inv (list, 2, 1,0, n-1);
1724 t=quantile_rank ( list,1, n,PP->filter);
1726 if ( PP->print_rapdb)
1728 for ( a=0; a<n; a++)
1730 if ( list[a][1]>0 && a<t)fprintf ( stdout, "\nRAPDB QUANTILE REMOVE S1: %3d S2: %3d COL: %3d SCORE*100: %d", s1, s2, list[a][0], list[a][1]);
1731 else if ( list[a][1]>0 && a>t)fprintf ( stdout, "\nRAPDB QUANTILE KEEP S1: %3d S2: %3d COL: %3d SCORE*100: %d", s1, s2, list[a][0], list[a][1]);
1735 for ( a=0; a<t; a++)
1738 residues[s1][s2][list[a][0]][0]=0;
1739 residues[s1][s2][list[a][0]][1]=0;
1740 residues[s1][s2][list[a][0]][2]=0;
1741 residues[s1][s2][list[a][0]][3]=0;
1742 residues[s1][s2][list[a][0]][4]=-1;
1746 free_int (list, -1);
1751 Alignment * analyse_pdb ( Alignment *A, Alignment *ST, char *results)
1753 int s1,s2,r1, r2,b, p;
1755 float **normalize_len;
1757 float pair_tot=0, pair_m1, pair_m2, pair_m3, pair_m4, pair_m5, pair_len=0;
1758 float seq_tot, seq_m1, seq_m2, seq_m3, seq_m4, seq_m5,seq_len;
1759 float msa_tot, msa_m1, msa_m2, msa_m3, msa_m4, msa_m5, msa_len;
1760 float iRMSD_unit, iRMSD_max, iRMSD_min;
1763 Constraint_list *CL;
1764 char *average_file, *pairwise_file, *total_file, *irmsd_file=0;
1765 FILE *fp, *average,*pairwise, *total, *irmsd_graph=0;
1768 fp =vfopen ( results, "w");
1769 pairwise=vfopen ((pairwise_file=vtmpnam (NULL)),"w");
1770 average =vfopen ((average_file =vtmpnam (NULL)),"w");
1771 total =vfopen ((total_file =vtmpnam (NULL)),"w");
1776 for ( s1=0; s1< (A->S)->nseq; s1++)
1777 if ( CL->T[s1]){PP=(CL->T[s1])->pdb_param;break;}
1779 if (PP->irmsd_graph)irmsd_graph =vfopen ((irmsd_file =vtmpnam (NULL)),"w");
1781 fprintf ( fp, "\nAPDB_RESULT_FORMAT_02\n");
1782 residues=analyse_pdb_residues ( A, A->CL,PP);
1783 if ( PP->filter>=0)residues=quantile_apdb_filtration (A, residues, A->CL,PP, fp);
1784 else if ( PP->filter<0)residues=irmsdmin_apdb_filtration (A, residues, A->CL,PP, fp);
1786 pos=aln2pos_simple (A, A->nseq);
1792 /*Compute the alignment length for normalization*/
1793 normalize_len=declare_float (A->nseq+1, A->nseq+1);
1794 for (s1=0; s1<A->nseq; s1++)
1797 for (s2=0; s2<A->nseq; s2++)
1799 for ( l1=l2=p=0; p< A->len_aln; p++)
1801 r1=A->seq_al[s1][p];
1802 r2=A->seq_al[s2][p];
1803 if (!is_gap(r1) && isupper(r1))l1++;
1804 if (!is_gap(r2) && isupper(r2))l2++;
1806 normalize_len[s1][s2]=MIN(l1,l2);
1810 msa_len=msa_tot=msa_m1=msa_m2=msa_m3=msa_m4=msa_m5=0;
1812 for ( s1=0; s1< A->nseq; s1++)
1814 if ( !(CL->T[A->order[s1][0]]))continue;
1815 seq_len=seq_tot=seq_m1=seq_m2=seq_m3=seq_m4=seq_m5=0;
1816 for ( s2=0; s2< A->nseq; s2++)
1818 if ( s1==s2)continue;
1819 if ( !(CL->T[A->order[s2][0]]))continue;
1820 pair_tot=pair_m1=pair_m2=pair_m3=pair_m4=pair_m5=0;
1821 for ( p=0; p< A->len_aln; p++)
1823 r1=A->seq_al[s1][p];
1824 r2=A->seq_al[s2][p];
1830 if (is_gap(r1) || is_gap(r2) || residues[s1][s2][b][0]==0)
1832 A->seq_al[s1][p]=tolower(r1);
1833 A->seq_al[s2][p]=tolower(r2);
1837 A->seq_al[s1][p]=toupper(r1);
1838 A->seq_al[s2][p]=toupper(r2);
1843 if ( PP->irmsd_graph && ( is_gap(r1) || is_gap(r2) || residues[s1][s2][b][0]==0))
1846 fprintf ( irmsd_graph, "\n_IRMSD_GRAPH %10s %10s ALN: %c%c iRMSD: -1.00", A->name[s1], A->name[s2],A->seq_al[s1][p], A->seq_al[s2][p]);
1849 if (is_gap(r1) || is_gap(r2) || residues[s1][s2][b][0]==0)continue;
1853 m2=(residues[s1][s2][b][2]*100)/residues[s1][s2][b][0];
1854 if (m2>PP->similarity_threshold){pair_m3++;}
1858 m4=residues[s1][s2][b][4];
1860 if ( PP->irmsd_graph )
1862 fprintf ( irmsd_graph, "\nIRMSD_GRAPH %10s %10s ALN: %c%c iRMSD: %.2f", A->name[s1], A->name[s2],A->seq_al[s1][p], A->seq_al[s2][p], m4);
1866 pair_len=normalize_len[s1][s2];
1870 fprintf ( pairwise, "\n\n#PAIRWISE: %s Vs %s",A->name[s1], A->name[s2]);
1871 fprintf ( pairwise, "\n\tPAIRWISE EVALUATED: %6.2f %% [%s Vs %s] ", (pair_len==0)?-1:(pair_tot*100)/pair_len,A->name[s1], A->name[s2]);
1872 fprintf ( pairwise, "\n\tPAIRWISE APDB: %6.2f %% [%s Vs %s] ", (pair_tot==0)?-1:(pair_m3*100)/pair_tot,A->name[s1], A->name[s2]);
1873 fprintf ( pairwise, "\n\tPAIRWISE iRMSD: %6.2f Angs [%s Vs %s]", (pair_tot==0)?-1:pair_m4/pair_tot,A->name[s1], A->name[s2]);
1874 fprintf ( pairwise, "\n\tPAIRWISE NiRMSD: %6.2f Angs [%s Vs %s] [%d pos]", (pair_tot==0)?-1:(pair_m4*pair_len)/(pair_tot*pair_tot), A->name[s1], A->name[s2], (int)pair_tot);
1875 fprintf ( pairwise, "\n\tRAPDB PAIRS PAIRWISE N_NONEMPTY_PAIRS %d N_MAXIMUM_PAIRS %d",(int) pair_tot, (int)pair_len);
1888 fprintf ( average, "\n\n#AVERAGE For Sequence %s", A->name[s1]);
1889 fprintf ( average, "\n\tAVERAGE EVALUATED: %6.2f %% [%s]", (seq_len==0)?-1:(seq_tot*100)/seq_len, A->name[s1]);
1890 fprintf ( average, "\n\tAVERAGE APDB: %6.2f %% [%s]", (seq_tot==0)?-1:(seq_m3*100)/seq_tot, A->name[s1]);
1891 fprintf ( average, "\n\tAVERAGE iRMSD: %6.2f Angs [%s]", (seq_tot==0)?-1:seq_m4/seq_tot, A->name[s1]);
1892 fprintf ( average, "\n\tAVERAGE NiRMS: %6.2f Angs [%s]", (seq_tot==0)?-1:(seq_m4*seq_len)/(seq_tot*seq_tot), A->name[s1]);
1893 if ( strm (PP->color_mode, "apdb"))ST->score_seq[s1]=(seq_tot==0)?-1:(seq_m3*100)/pair_tot;
1894 if (PP->print_rapdb)fprintf (average, "\n\tRAPDB PAIRS AVERAGE N_NONEMPTY_PAIRS %d N_MAXIMUM_PAIRS %d", (int)pair_tot, (int)pair_len);
1896 if ( strm (PP->color_mode, "irmsd"))ST->score_seq[s1]=(seq_tot==0)?-1:10*((seq_m4*pair_len)/(seq_tot*seq_tot));
1899 fprintf ( total, "\n\n#TOTAL for the Full MSA");
1900 fprintf ( total, "\n\tTOTAL EVALUATED: %6.2f %% ", (msa_len==0)?-1:(msa_tot*100)/msa_len);
1901 fprintf ( total, "\n\tTOTAL APDB: %6.2f %% ", (msa_tot==0)?-1:(msa_m3*100)/msa_tot);
1902 fprintf ( total, "\n\tTOTAL iRMSD: %6.2f Angs", (msa_tot==0)?-1:msa_m4/msa_tot);
1903 fprintf ( total, "\n\tTOTAL NiRMSD: %6.2f Angs", (msa_tot==0)?-1:(msa_m4*msa_len)/(msa_tot*msa_tot));
1904 if (PP->print_rapdb)fprintf (total, "\n\tRAPDB PAIRS TOTAL N_NONEMPTY_PAIRS: %d N_MAXIMUM_PAIRS %d", (int)msa_tot, (int)msa_len);
1906 if ( strm (PP->color_mode, "apdb")) ST->score_aln=ST->score=A->score_aln=A->score=(msa_tot==0)?-1:(msa_m3*100)/msa_tot;
1907 if ( strm (PP->color_mode, "irmsd"))ST->score_aln=ST->score=A->score_aln=A->score=(msa_tot==0)?-1:10*((msa_m4*msa_len)/(msa_tot*msa_tot));
1909 vfclose (average);vfclose (total); vfclose (pairwise);if (PP->irmsd_graph)vfclose (irmsd_graph);
1910 fp=display_file_content (fp, pairwise_file);
1911 fp=display_file_content (fp, average_file);
1912 fp=display_file_content (fp, total_file);
1913 if ( PP->irmsd_graph)fp=display_file_content (fp, irmsd_file);
1915 fprintf ( fp, "\n\n# EVALUATED: Fraction of Pairwise Columns Evaluated\n");
1916 fprintf ( fp, "# APDB: Fraction of Correct Columns according to APDB\n");
1917 fprintf ( fp, "# iRMDS: Average iRMSD over all evaluated columns\n");
1918 fprintf ( fp, "# NiRMDS: iRMSD*MIN(L1,L2)/Number Evaluated Columns\n");
1919 fprintf ( fp, "# Main Parameter: -maximum_distance %.2f Angstrom\n", PP->maximum_distance);
1921 fprintf ( fp, "# Undefined values are set to -1 and indicate LOW Alignment Quality\n");
1922 fp=print_program_information (fp, NULL);
1928 for (iRMSD_max=0,iRMSD_min=10000,s1=0; s1<A->nseq; s1++)
1929 for ( s2=0; s2< A->nseq; s2++)
1930 for (p=0; p<A->len_aln; p++)
1932 if ( residues[s1][s2][p][4]>0)
1934 iRMSD_max=MAX(iRMSD_max, residues[s1][s2][p][4]);
1935 iRMSD_min=MAX(iRMSD_min, residues[s1][s2][p][4]);
1939 iRMSD_unit=iRMSD_max/8;
1941 for (p=0; p< A->len_aln; p++)
1942 for ( s1=0; s1< A->nseq; s1++)
1945 for ( p=0; p< A->len_aln; p++)
1947 r1=A->seq_al[s1][p];
1949 if ( is_gap(r1) || !(CL->T[A->order[s1][0]]))
1950 ST->seq_al[s1][p]=NO_COLOR_RESIDUE;
1953 float tot_m2=0, tot_m4=0, v=0;
1956 for (s2=0; s2< A->nseq; s2++)
1958 r2=A->seq_al[s1][p];
1959 if ( s1==s2) continue;
1960 if (is_gap(r2) || !(CL->T[A->order[s1][0]]) || residues[s1][s2][b][0]==0)continue;
1962 seq_m2+=m2=(residues[s1][s2][b][2]*100)/residues[s1][s2][b][0];
1965 m4=residues[s1][s2][b][4];
1973 if (strm ( PP->color_mode, "apdb"))
1975 if (tot_m2==0)v=NO_COLOR_RESIDUE;
1976 else v=MIN((seq_m2/(10*tot_m2)),9);
1978 else if ( strm (PP->color_mode, "irmsd"))
1980 if ( tot_m4==0)v=NO_COLOR_RESIDUE;
1981 else v=(8-(int)((seq_m4/(iRMSD_unit*tot_m4))))+1;
1983 ST->seq_al[s1][p]=v;
1988 for ( p=0; p<A->len_aln; p++) ST->seq_al[A->nseq][p]=NO_COLOR_RESIDUE;
1991 ST->generic_comment=vcalloc ( 100, sizeof (int));
1992 if ( strm (PP->color_mode, "apdb"))
1994 sprintf ( ST->generic_comment, "# APDB Evaluation: Color Range Blue-[0 %% -- 100 %%]-Red\n# Sequence Score: APDB\n# Local Score: APDB\n\n");
1996 else if ( strm (PP->color_mode, "irmsd"))
1998 sprintf ( ST->generic_comment, "\n# iRMSD Evaluation:\n# Sequence score: NiRMSD (Angstrom*10)\n# Local Score: iRMSD, Blue-[%.2f Ang. -- 0.00 Ang.]-Red \n", iRMSD_max);
2001 fprintf ( fp, "\n");
2006 float **** analyse_pdb_residues ( Alignment *A, Constraint_list *CL, Pdb_param *pdb_param)
2010 int s1, s2, rs1, rs2;
2012 float ****distances;
2014 /*Distances[Nseq][len_aln][4]
2015 distances...[0]: Number of residues within the bubble
2016 distances...[1]: Absolute difference of distance of residues within Bubble
2017 distances...[2]: Number of residues within the bubble with Delta dist < md_threshold
2018 distances ..[3]: Sum of squared difference of distances
2019 distances ..[4]: iRMSD
2024 int real_res1_col1=0;
2030 float nrapdb, rapdb;
2034 print_rapdb=PP->print_rapdb;
2036 distances=declare_arrayN(4, sizeof (float), A->nseq, A->nseq, 0, 0);
2038 /*Pre-computation of the internal distances----> T[seq]->ca_dist[len][len]*/
2039 /*Can be avoided if distance_on_request set to 1 */
2041 for ( s1=0; s1< A->nseq; s1++)
2043 rs1=A->order[s1][0];
2044 if (CL->T[rs1] && !(CL->T[rs1])->ca_dist)(CL->T[rs1])->ca_dist=measure_ca_distances(CL->T[rs1]);
2045 for ( s2=0; s2< A->nseq; s2++)
2047 distances[s1][s2]=declare_float ( A->len_aln, 6);
2050 pos=aln2pos_simple (A, A->nseq);
2052 for ( s1=0; s1< A->nseq; s1++)
2053 for ( col1=0; col1< A->len_aln; col1++)
2054 for ( s2=0; s2<A->nseq; s2++)
2056 rs1=A->order[s1][0];
2057 rs2=A->order[s2][0];
2060 if ( s1==s2) continue;
2061 else if (!(CL->T[rs1]) || !(CL->T[rs2]))continue;
2062 else if ( islower (A->seq_al[s1][col1]) || islower ( A->seq_al[s2][col1]))continue;
2063 else if ( pos[s1][col1]<=0 || pos[s2][col1]<=0 ) continue;
2065 if ( print_rapdb && s2>s1)
2068 fprintf ( stdout, "RAPDB S1: %s S2: %s POS %d %d %c %d %c ", A->name[s1], A->name[s2], col1+1, pos[s1][col1],A->seq_al[s1][col1], pos[s2][col1],A->seq_al[s2][col1]);
2069 BA=copy_aln (A, BA);
2070 lower_string (BA->seq_al[s1]);
2071 lower_string (BA->seq_al[s2]);
2072 BA->seq_al[s1][col1]=toupper (BA->seq_al[s1][col1]);
2073 BA->seq_al[s2][col1]=toupper (BA->seq_al[s2][col1]);
2076 for ( col2=0; col2<A->len_aln; col2++)
2079 if (pos[s1][col2]<=0 || pos[s2][col2]<=0 )continue;
2080 else if ( FABS((pos[s1][col2]-pos[s1][col1]))<=PP->n_excluded_nb)continue;
2081 else if ( FABS((pos[s2][col2]-pos[s2][col1]))<=PP->n_excluded_nb)continue;
2082 else if ( islower (A->seq_al[s1][col2]) || islower ( A->seq_al[s2][col2]))continue;
2084 real_res1_col1=pos[s1][col1]-1;
2085 real_res1_col2=pos[s1][col2]-1;
2087 real_res2_col1=pos[s2][col1]-1;
2088 real_res2_col2=pos[s2][col2]-1;
2090 d1=(CL->T[rs1])->ca_dist[real_res1_col1][real_res1_col2];
2091 d2=(CL->T[rs2])->ca_dist[real_res2_col1][real_res2_col2];
2093 if ( d1==UNDEFINED || d2 == UNDEFINED) continue;
2097 if ( strm ( PP->local_mode, "sphere"))
2099 in_bubble= (d1<PP->maximum_distance && d2<PP->maximum_distance)?1:0; ;
2101 else if ( strm ( PP->local_mode, "window"))
2103 wd1=FABS((pos[s1][col2]-pos[s1][col1]));
2104 wd2=FABS((pos[s2][col2]-pos[s2][col1]));
2105 in_bubble= (wd1<PP->maximum_distance && wd2<PP->maximum_distance)?1:0; ;
2110 if ( print_rapdb && s2 >s1)
2112 fprintf ( stdout, "NB %d %d %c %d %c ", col2, pos[s1][col2], A->seq_al[s1][col2], pos[s2][col2], A->seq_al[s2][col2]);
2113 BA->seq_al[s1][col2]=toupper (BA->seq_al[s1][col2]);
2114 BA->seq_al[s2][col2]=toupper (BA->seq_al[s2][col2]);
2116 delta=FABS((d1-d2));
2117 if (delta<PP->md_threshold)
2118 distances[s1][s2][real_res1_col1][2]++;
2119 distances[s1][s2][real_res1_col1][1]+=delta;
2120 distances[s1][s2][real_res1_col1][0]++;
2121 distances[s1][s2][real_res1_col1][3]+=delta*delta;
2127 if ( nrapdb==0)distances[s1][s2][real_res1_col1][4]=-1;
2128 else distances[s1][s2][real_res1_col1][4]=(float)sqrt((double)(rapdb/nrapdb));
2130 if ( print_rapdb && s2>s1)
2134 fprintf ( stdout, "APDB: UNDEFINED\n");
2139 fprintf ( stdout, " APDB: %.2f ",(float)sqrt((double)(rapdb/nrapdb)));
2140 BA->residue_case=KEEP_CASE;unalign_residues (BA, s1, s2);
2141 fprintf ( stdout,"SEQ1: %s %s SEQ2: %s %s\n", BA->name[s1], BA->seq_al[s1], BA->name[s2], BA->seq_al[s2]);
2151 int pair_res_suitable4trmsd (int s1,int col1, int col2, Alignment *A, int **pos, Pdb_param *PP, Constraint_list *CL,int *s);
2152 int aln_column_contains_gap (Alignment *A, int c);
2153 float aln2ncol4trmsd(Alignment *A, int **pos, Constraint_list *CL, int **lc);
2154 int pair_columns_suitable4trmsd(int col1, int col2, Alignment *A, int **pos, Pdb_param *PP, Constraint_list *CL,int *s);
2155 int column_is_suitable4trmsd(int col1,Alignment *A, int **pos, Pdb_param *PP, Constraint_list *CL,int *s);
2159 NT_node trmsdmat2tree (float **dm, int **count,Alignment *A);
2160 Alignment * msa2struc_dist ( Alignment *A, Alignment *ST, char *results, int gapped, int min_ncol4trmsd)
2165 int s1, s2, rs1, rs2;
2167 float ****distances;
2169 int **count,**tcount;
2170 int print_subtrees=0;
2173 /*Distances[Nseq][len_aln][4]
2174 distances...[0]: Number of residues within the bubble
2175 distances...[1]: Absolute difference of distance of residues within Bubble
2176 distances...[2]: Number of residues within the bubble with Delta dist < md_threshold
2177 distances ..[3]: Sum of squared difference of distances
2178 distances ..[4]: iRMSD
2180 Pdb_param *pdb_param;
2181 Constraint_list *CL;
2182 int a, b, ncol, npos,n;
2186 int real_res1_col1=0;
2192 float nrapdb, rapdb;
2194 NT_node *T0,*T1,*T2,*PT, *POS;
2195 NT_node BT0, BT10,BT50, BT100=NULL,RBT;
2196 char **pair_pos_list;
2198 int ntree=0, ntree2;
2205 char *struc_tree100;
2208 char *consense_file;
2210 char *color_struc_tree;
2216 if (min_ncol4trmsd<0)
2219 min_ncol4trmsd=(min_ncol4trmsd*A->len_aln)/100;
2221 else if ( min_ncol4trmsd>=A->len_aln)
2223 min_ncol4trmsd=A->len_aln-1;
2226 lc=declare_int (A->nseq, 2);
2227 for (a=0; a<A->nseq; a++)lc[a][0]=a;
2229 declare_name(tot_pos_list);
2230 sprintf ( tot_pos_list, "%s.struc_tree.list", results);
2232 declare_name(consense_file);
2233 sprintf (consense_file, "%s.struc_tree.consense_output", results);
2235 declare_name(pos_list);
2236 sprintf ( pos_list, "%s.pos_list", results);
2238 declare_name(struc_tree0);
2239 sprintf ( struc_tree0, "%s.struc_tree.consensus",results);
2241 declare_name(struc_tree10);
2242 sprintf ( struc_tree10, "%s.struc_tree10",results);
2244 declare_name(struc_tree100);
2245 sprintf ( struc_tree100, "%s.struc_tree100",results);
2247 declare_name(struc_tree50);
2248 sprintf ( struc_tree50, "%s.struc_tree50",results);
2250 declare_name(color_struc_tree);
2251 sprintf ( color_struc_tree, "%s.struc_tree.html", results);
2253 pair_pos_list=declare_char (A->len_aln*A->len_aln+1, 100);
2254 T1=vcalloc (A->len_aln*A->len_aln+1, sizeof (NT_node));
2255 T2=vcalloc (A->len_aln+1, sizeof (NT_node));
2257 PT=vcalloc (A->len_aln*A->len_aln+1, sizeof (NT_node));
2258 POS=vcalloc (A->len_aln+1, sizeof (NT_node));
2262 //Check all sequences have a PDB structure
2264 for (used=0,a=0; a<A->nseq; a++)
2266 if ( ! seq2P_template_file(A->S,a))
2268 add_warning (stderr, "Sequence %s removed from the dataset [No Usable Structure]", A->name[a]);
2274 sprintf (A->name[used], "%s", A->name[a]);
2275 sprintf (A->seq_al[used], "%s", A->seq_al[a]);
2276 for (b=0; b<4; b++)A->order[used][b]=A->order[a][b];
2284 if (A->nseq<2)myexit (fprintf_error(stderr, "Two sequences at least must have a known structure"));
2286 for ( s1=0; s1< (A->S)->nseq; s1++)
2287 if ( CL->T[s1]){PP=(CL->T[s1])->pdb_param;break;}
2289 for ( s1=0; s1< A->nseq; s1++)
2291 rs1=A->order[s1][0];
2292 if (CL->T[rs1] && !(CL->T[rs1])->ca_dist)(CL->T[rs1])->ca_dist=measure_ca_distances(CL->T[rs1]);
2294 pos=aln2pos_simple (A, A->nseq);
2296 dm=declare_float (A->nseq, A->nseq);
2297 count=declare_int (A->nseq, A->nseq);
2298 tdm=declare_float (A->nseq, A->nseq);
2299 tcount=declare_int (A->nseq, A->nseq);
2301 PP->maximum_distance=1000;
2302 sprintf ( PP->local_mode, "sphere");
2304 while ((npos=aln2ncol4trmsd(A,pos,CL,lc))<min_ncol4trmsd && A->nseq>1)
2307 sort_int_inv (lc,2, 1, 0,A->nseq-1);
2308 add_information (stderr, "Remove Sequence [%s] that contains %d un-suitable positions", A->name[lc[0][0]], lc[0][1]);
2309 A=remove_seq_from_aln (A, A->name[lc[0][0]]);
2311 pos=aln2pos_simple (A, A->nseq);
2315 myexit (fprintf_error(stderr,"No suitable pair of column supporting a tree"));
2318 fprintf ( stderr, "\n---- Number of usable positions: %d [%.2f %%]\n", npos, ((float)npos*100)/(float)A->len_aln);
2320 tl=vfopen (tot_pos_list, "w");
2321 for (ncol=0,ntree=0, col1=0; col1< A->len_aln; col1++)
2324 //output_completion (stderr, col1, A->len_aln,1, "Sample Columns");
2325 if (!gapped && aln_column_contains_gap (A, col1))continue;
2326 for ( cont=1,ntree2=0,col2=0; col2<A->len_aln; col2++)
2328 for (s1=0; s1< A->nseq-1; s1++)
2330 rs1=A->order[s1][0];
2331 if (!pair_res_suitable4trmsd (s1,col1, col2, A, pos, PP, CL, &w))continue;
2332 for ( s2=s1+1; s2<A->nseq; s2++)
2334 if (!pair_res_suitable4trmsd (s2,col1, col2, A, pos, PP, CL, &w))continue;
2336 rs2=A->order[s2][0];
2337 real_res1_col1=pos[s1][col1]-1;
2338 real_res1_col2=pos[s1][col2]-1;
2339 real_res2_col1=pos[s2][col1]-1;
2340 real_res2_col2=pos[s2][col2]-1;
2342 d1=(CL->T[rs1])->ca_dist[real_res1_col1][real_res1_col2];
2343 d2=(CL->T[rs2])->ca_dist[real_res2_col1][real_res2_col2];
2345 delta=FABS((d1-d2));
2346 dm[s1][s2]=dm[s2][s1]+=delta;
2347 tdm[s1][s2]=tdm[s2][s1]+=delta;
2359 if ((POS[col1]=trmsdmat2tree (dm, count, A)))
2361 T1[ntree]=POS[col1];
2362 fprintf (tl, "\n>Tree_%d Column\n", col1+1);
2363 print_tree (T1[ntree], "newick", tl);
2369 if (!ntree){fprintf ( stderr, "\nERROR: No suitable pair of column supporting a tree [FATAL]\n"); exit (EXIT_SUCCESS);}
2371 score=treelist2avg_treecmp (T1, NULL);
2372 display_output_filename( stderr,"TreeList","newick",tot_pos_list, CHECK);
2374 if (treelist_file2consense (tot_pos_list, NULL, consense_file))
2376 display_output_filename( stderr,"ConsenseTree","phylip",consense_file, CHECK);
2380 fprintf ( stderr, "\nPhylip is not installed: the program could not produce the consense output. This is not mandatory but useful");
2385 if ((BT100=treelist2filtered_bootstrap (T1, NULL,score, 1.0)))
2387 vfclose (print_tree (BT100,"newick", vfopen (struc_tree0, "w")));
2388 display_output_filename( stderr,"Tree","newick",struc_tree0, CHECK);
2393 if ( (BT0=trmsdmat2tree (tdm, tcount, A)))
2395 vfclose (print_tree (BT0,"newick", vfopen (struc_tree0, "w")));
2396 display_output_filename( stderr,"Tree","newick",struc_tree0, CHECK);
2398 if ((BT10=treelist2filtered_bootstrap (T1, NULL,score, 0.1)))
2400 vfclose (print_tree (BT10,"newick", vfopen (struc_tree10, "w")));
2401 display_output_filename( stderr,"Tree","newick",struc_tree10, CHECK);
2404 if ((BT50=treelist2filtered_bootstrap (T1, NULL, score,0.5)))
2406 vfclose (print_tree (BT50,"newick", vfopen (struc_tree50, "w")));
2407 display_output_filename( stderr,"Tree","newick",struc_tree50, CHECK);
2412 if (!BT100)BT100=treelist2filtered_bootstrap (T1, NULL,score, 1.0);
2417 B=copy_aln (A, NULL);
2418 for (a=0; a<A->len_aln; a++)
2425 S=tree_cmp (POS[a], RBT);
2430 score=NO_COLOR_RESIDUE;
2433 for (b=0; b<B->nseq; b++)
2435 if ( is_gap (B->seq_al[b][a]) || score == NO_COLOR_RESIDUE)
2437 B->seq_al[b][a]=NO_COLOR_RESIDUE;
2441 B->seq_al[b][a]=S->uw/10;
2447 output_format_aln ("score_html", A,B,color_struc_tree);
2448 display_output_filename( stderr,"Colored MSA","score_html",color_struc_tree, CHECK);
2450 fprintf ( stderr, "\n");
2452 fprintf ( stderr, "\n");
2454 exit (EXIT_SUCCESS);
2457 NT_node trmsdmat2tree (float **dm, int **count,Alignment *A)
2466 for (s1=0; s1<ns-1; s1++)
2467 for (s2=s1+1; s2<ns; s2++)
2469 if ( count [s1][s2])dm[s1][s2]=dm[s2][s1]=dm[s1][s2]/(float)count[s1][s2];
2474 if (s1==0 && s2==1)min=max=dm[s1][s2];
2475 min=MIN(dm[s1][s2], min);
2476 max=MAX(dm[s1][s2], max);
2478 dm_int=declare_int (ns, ns);
2479 for (s1=0; s1<A->nseq-1; s1++)
2480 for (s2=s1+1; s2<A->nseq; s2++)
2482 dm_int[s1][s2]=dm_int[s2][s1]=((dm[s1][s2])/(max))*100;
2484 T=compute_std_tree_2(A, dm_int, "_TMODE_upgma");
2485 free_int (dm_int, -1);
2486 for (s1=0; s1<ns; s1++)for ( s2=0; s2<ns; s2++){dm[s1][s2]=count[s1][s2]=0;}
2490 int pair_res_suitable4trmsd (int s1,int col1, int col2, Alignment *A, int **pos, Pdb_param *PP, Constraint_list *CL,int *s)
2494 if ( !(CL->T[rs])){s[0]=s1; return 0;}
2495 else if (is_gap (A->seq_al[s1][col1])){s[0]=s1;return 0;}
2496 else if (is_gap (A->seq_al[s1][col2])){s[0]=s1;return 0;}
2498 else if (islower(A->seq_al[s1][col1])){s[0]=s1; return 0;}
2499 else if (islower(A->seq_al[s1][col2])){s[0]=s1; return 0;}
2501 else if ( FABS(((pos[s1][col2])-(pos[s1][col1])))<=PP->n_excluded_nb){s[0]=s1;return 0;}
2502 else if ((CL->T[rs])->ca_dist[pos[s1][col1]-1][pos[s1][col2]-1]==UNDEFINED){s[0]=s1;return 0;}
2505 int pair_columns_suitable4trmsd(int col1, int col2, Alignment *A, int **pos, Pdb_param *PP, Constraint_list *CL,int *s)
2508 if (!column_is_suitable4trmsd (col1, A, pos, PP, CL,s))return 0;
2509 if (!column_is_suitable4trmsd (col2, A, pos, PP, CL,s))return 0;
2510 for (s1=0; s1<A->nseq; s1++)
2515 if ( FABS(((pos[s1][col2])-(pos[s1][col1])))<=PP->n_excluded_nb){s[0]=s1;return 0;}
2516 if ((CL->T[rs])->ca_dist[pos[s1][col1]-1][pos[s1][col2]-1]==UNDEFINED){s[0]=s1;return 0;}
2517 rr1=pos[s1][col1]-1;
2518 rr2=pos[s1][col2]-1;
2519 if ((CL->T[rs])->ca_dist[rr1][rr2]>PP->maximum_distance){s[0]=s1;return 0;}
2523 int column_is_suitable4trmsd(int col1,Alignment *A, int **pos, Pdb_param *PP, Constraint_list *CL,int *s)
2526 for ( s1=0; s1<A->nseq; s1++)
2530 if ( !(CL->T[rs])){s[0]=s1; return 0;}
2531 else if (is_gap (A->seq_al[s1][col1])){s[0]=s1;return 0;}
2532 else if (islower(A->seq_al[s1][col1])){s[0]=s1; return 0;}
2536 int aln_column_contains_gap (Alignment *A, int c)
2539 if ( !A || c>=A->len_aln || c<0)
2541 printf ( "\nERROR: values out of range in aln_column_contains_gap [FATL:%s]\n", PROGRAM);
2542 exit (EXIT_FAILURE);
2544 for ( a=0; a<A->nseq; a++) if ( is_gap(A->seq_al[a][c]))return 1;
2549 float aln2ncol4trmsd(Alignment *A, int **pos, Constraint_list *CL, int **lc)
2551 //This function estimates the number of columns suitable for constructing a trmsd
2552 int col1, s1, ncol, n, rs1, real_res1_col1;
2554 for (s1=0; s1<A->nseq; s1++){lc[s1][0]=s1; lc[s1][1]=0;}
2555 for (ncol=0,col1=0; col1< A->len_aln; col1++)
2557 for (n=0,s1=0; s1<A->nseq; s1++)
2559 real_res1_col1=pos[s1][col1]-1;
2560 rs1=A->order[s1][0];
2562 if (real_res1_col1<0)lc[s1][1]++;
2563 else if (!((CL->T[A->order[s1][0]])->ca[real_res1_col1]))lc[s1][1]++;
2574 float square_atom ( Atom *X)
2577 return X->x*X->x + X->y*X->y + X->z*X->z;
2579 Atom* reframe_atom ( Atom *X, Atom*Y, Atom *Z, Atom *IN, Atom *R)
2581 float new_x, new_y, new_z;
2583 if ( R==NULL)R=vcalloc ( 1, sizeof (Atom));
2586 new_x= X->x*IN->x + Y->x*IN->y +Z->x*IN->z;
2587 new_y= X->y*IN->x + Y->y*IN->y +Z->y*IN->z;
2588 new_z= X->z*IN->x + Y->z*IN->y +Z->z*IN->z;
2596 Atom* add_atom ( Atom *A, Atom*B, Atom *R)
2598 if ( R==NULL)R=vcalloc ( 1, sizeof (Atom));
2606 Atom* diff_atom ( Atom *A, Atom*B, Atom *R)
2608 if ( R==NULL)R=vcalloc ( 1, sizeof (Atom));
2617 Atom * copy_atom ( Atom *A, Atom*R)
2619 if ( R==NULL)R=vcalloc ( 1, sizeof (Atom));
2621 R->res_num=A->res_num;
2626 sprintf( R->type, "%s", A->type);
2629 void print_atom (Atom *A)
2631 fprintf ( stdout, "%.2f %.2f %.2f", A->x, A->y, A->z);
2633 /************************************************************************/
2637 /************************************************************************/
2639 /*---------prototypes ----------*/
2640 static void computeBasePairMatrix(int**M,char*S,int l, int T);
2641 static int backtrack(int a,int b,int**M,char*S,char*P, int T);
2645 static int basePair(char x, char y)
2652 int a, b, c1, c2, lc1, lc2;
2653 mat=declare_short (256, 256);
2654 sprintf ( alp, "AGCTUagctu");
2655 for (a=0; a<strlen (alp); a++)
2657 for (b=a; b<strlen (alp)-1; b++)
2659 c1=alp[a];c2=alp[b];
2660 lc1=tolower(c1); lc2=tolower(c2);
2661 if ( lc1=='g' && lc2=='c')
2663 else if ( lc1=='a' && lc2=='u')
2665 else if ( lc1=='u' && lc2=='g')
2667 mat[c2][c1]=mat[c1][c2];
2671 return (int)mat[(int)x][(int)y];
2676 /* ------------------------------------------------------------ */
2678 char *nussinov(char *S, int THRESHOLD)
2683 /*-------------------------------
2685 paren is parenthesis expression for
2686 optimal RNA secondary structure
2687 THRESHOLD: Min distance between two paired residues
2688 -------------------------------*/
2693 /*----- initialization --*/
2695 paren=vcalloc (n+1, sizeof (char));
2696 numBasePairs=declare_int (n,n);
2698 for (i=0;i<n;i++) paren[i]='.';
2699 paren[n]='\0'; // paren is string of same length as S
2700 computeBasePairMatrix(numBasePairs,S,n, THRESHOLD);
2701 backtrack(0,n-1,numBasePairs,S,paren, THRESHOLD);
2702 free_int (numBasePairs, -1);
2706 static void computeBasePairMatrix(int** numBasePairs,char *S,int n, int THRESHOLD)
2708 int i,j,d,k,max,val,index;
2710 for (d = THRESHOLD; d < n; d++){
2711 for(i=0; i < n; i++)
2717 /*-------------------------------------
2718 if index<n at end of for-loop, then this
2719 means that index and j form a base pair,
2720 and this is noted by numBasePairs[j][i]=index.
2721 if index=n at end of for-loop, then this
2722 means that j is not base paired.
2723 -------------------------------------*/
2725 if ( numBasePairs[i][j-1]>max ){
2726 max = numBasePairs[i][j-1];
2728 // j not basepaired with some k such that i<k<j
2731 val = basePair(S[i],S[j]) + numBasePairs[i+1][j-1];
2732 if ( j-i<= THRESHOLD && val > max ){
2736 for(k=i; k<=j-THRESHOLD; k++){
2737 val = basePair(S[k],S[j]) + numBasePairs[i][k-1]
2738 + numBasePairs[k+1][j-1];
2744 numBasePairs[i][j]=max;
2746 numBasePairs[j][i]=index;
2748 numBasePairs[j][i]=-1;
2758 static int backtrack(int i, int j, int **numBasePairs,char *S, char *paren, int THRESHOLD)
2762 k = numBasePairs[j][i];
2767 if( THRESHOLD <= (j-1)-(k+1) )
2768 backtrack(k+1,j-1,numBasePairs,S,paren, THRESHOLD);
2769 if (THRESHOLD <= k-1-i )
2770 backtrack(i,k-1,numBasePairs,S,paren, THRESHOLD);
2773 if( THRESHOLD <= j-1-i )
2775 backtrack(i,j-1,numBasePairs,S,paren, THRESHOLD);
2783 char * rna_struc2rna_lib ( char *seq_name, char *seq, char *name)
2789 st=nussinov (seq, 2);
2790 if ( name==NULL)name=vtmpnam(NULL);
2791 fp=vfopen ( name, "w");
2792 fprintf (fp, "! TC_LIB_FORMAT_01\n");
2793 fprintf (fp, "1\n%s %d %s\n", seq_name, (int)strlen (seq), seq);
2794 fprintf (fp, "#1 1\n");
2795 display_rna_ss (0, seq, st, fp);
2796 fprintf ( fp, "! SEQ_1_TO_N\n");
2799 //printf_system ( "cp %s test", name);
2802 int display_rna_ss ( int n, char *seq, char *st, FILE *fp)
2808 while ((p=st[n])!='\0')
2813 sprintf (string, "%d",n+1);
2814 n=display_rna_ss (n+1, seq, st, fp);
2815 fprintf (fp, "%s %d 100\n", string, n+1);
2826 /******************************COPYRIGHT NOTICE*******************************/
2827 /*© Centro de Regulacio Genomica */
2829 /*Cedric Notredame */
2830 /*Fri Feb 18 08:27:45 CET 2011 - Revision 596. */
2831 /*All rights reserved.*/
2832 /*This file is part of T-COFFEE.*/
2834 /* T-COFFEE is free software; you can redistribute it and/or modify*/
2835 /* it under the terms of the GNU General Public License as published by*/
2836 /* the Free Software Foundation; either version 2 of the License, or*/
2837 /* (at your option) any later version.*/
2839 /* T-COFFEE is distributed in the hope that it will be useful,*/
2840 /* but WITHOUT ANY WARRANTY; without even the implied warranty of*/
2841 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/
2842 /* GNU General Public License for more details.*/
2844 /* You should have received a copy of the GNU General Public License*/
2845 /* along with Foobar; if not, write to the Free Software*/
2846 /* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/
2847 /*............................................... |*/
2848 /* If you need some more information*/
2849 /* cedric.notredame@europe.com*/
2850 /*............................................... |*/
2854 /******************************COPYRIGHT NOTICE*******************************/