8 #include "io_lib_header.h"
9 #include "util_lib_header.h"
10 #include "define_header.h"
11 #include "dp_lib_header.h"
12 void print_atom ( Atom*A);
14 float **** quantile_apdb_filtration ( Alignment *A, float ****residues, Constraint_list *CL,Pdb_param *PP, FILE *fp);
15 float **** irmsdmin_apdb_filtration ( Alignment *A, float ****residues, Constraint_list *CL,Pdb_param *PP, FILE *fp);
16 int apdb ( int argc, char *argv[])
19 Constraint_list *CL=NULL;
31 /*PARAMETERS VARIABLES*/
54 float maximum_distance;
55 float similarity_threshold;
67 char **out_aln_format;
77 char **template_file_list;
88 char *prot_blast_server;
89 char *pdb_blast_server;
96 argv=standard_initialisation (argv, &argc);
98 /*PARAMETER PROTOTYPE: READ PARAMETER FILE */
99 declare_name (parameters);
104 /*Name*/ "-parameters" ,\
107 /*OPTIONAL?*/ OPTIONAL ,\
109 /*DOC*/ "Read the files in the parameter file" ,\
110 /*Parameter*/ ¶meters ,\
113 /*Min_value*/ "any" ,\
114 /*Max Value*/ "any" \
116 if ( parameters && parameters[0])
118 argv[argc]=vcalloc ( VERY_LONG_STRING, sizeof(char));
120 fp_parameters=vfopen (parameters, "r");
121 while ((c=fgetc (fp_parameters))!=EOF)argv[1][a++]=c;
122 vfclose (fp_parameters);
125 argv=break_list ( argv, &argc, "=:;, \n");
127 /*PARAMETER PROTOTYPE*/
128 declare_name (se_name);
136 /*OPTIONAL?*/ OPTIONAL ,\
139 /*Parameter*/ &se_name ,\
140 /*Def 1*/ "stderr" ,\
141 /*Def 2*/ "/dev/null" ,\
142 /*Min_value*/ "any" ,\
143 /*Max Value*/ "any" \
146 le=vfopen ( se_name, "w");
147 fprintf ( le, "\nPROGRAM: %s\n",argv[0]);
149 /*PARAMETER PROTOTYPE: IN */
150 list_file=declare_char ( 200, STRING);
151 n_list=get_cl_param(\
158 /*OPTIONAL?*/ OPTIONAL ,\
161 /*Parameter*/ list_file ,\
164 /*Min_value*/ "any" ,\
165 /*Max Value*/ "any" \
167 /*PARAMETER PROTOTYPE: IN */
168 struc_to_use=declare_char ( 200, STRING);
169 n_struc_to_use=get_cl_param(\
173 /*Name*/ "-struc_to_use" ,\
176 /*OPTIONAL?*/ OPTIONAL ,\
179 /*Parameter*/ struc_to_use ,\
182 /*Min_value*/ "any" ,\
183 /*Max Value*/ "any" \
186 /*PARAMETER PROTOTYPE: COMPARISON IO */
187 declare_name (comparison_io);
192 /*Name*/ "-io_format" ,\
195 /*OPTIONAL?*/ OPTIONAL ,\
198 /*Parameter*/ &comparison_io,\
199 /*Def 1*/ "hsgd0123456",\
201 /*Min_value*/ "any" ,\
202 /*Max Value*/ "any" \
204 /*PARAMETER PROTOTYPE: ALN */
213 /*OPTIONAL?*/ OPTIONAL ,\
219 /*Min_value*/ "any" ,\
220 /*Max Value*/ "any" \
222 /*PARAMETER PROTOTYPE: ALN */
224 declare_name (repeat_seq);
229 /*Name*/ "-repeat_seq" ,\
232 /*OPTIONAL?*/ OPTIONAL ,\
235 /*Parameter*/ &repeat_seq,\
238 /*Min_value*/ "any" ,\
239 /*Max Value*/ "any" \
242 /*PARAMETER PROTOTYPE: ALN */
243 declare_name (repeat_pdb);
248 /*Name*/ "-repeat_pdb" ,\
251 /*OPTIONAL?*/ OPTIONAL ,\
254 /*Parameter*/ &repeat_pdb,\
257 /*Min_value*/ "any" ,\
258 /*Max Value*/ "any" \
261 /*PARAMETER PROTOTYPE: Nb to exclude */
266 /*Name*/ "-n_excluded_nb" ,\
269 /*OPTIONAL?*/ OPTIONAL ,\
271 /*DOC*/ "Exclude the N Nb on each side of the central residue. -1 triggers an automatic setting equal to the window size corresponding to the sphere" ,\
272 /*Parameter*/ &n_excluded_nb ,\
275 /*Min_value*/ "any" ,\
276 /*Max Value*/ "any" \
278 /*PARAMETER PROTOTYPE: diatances to count */
283 /*Name*/ "-similarity_threshold" ,\
286 /*OPTIONAL?*/ OPTIONAL ,\
289 /*Parameter*/ &similarity_threshold,\
292 /*Min_value*/ "any" ,\
293 /*Max Value*/ "any" \
295 /*PARAMETER PROTOTYPE: diatances to count */
300 /*Name*/ "-filter" ,\
303 /*OPTIONAL?*/ OPTIONAL ,\
305 /*DOC*/ "Filter by only keeping the best quantile" ,\
306 /*Parameter*/ &filter,\
309 /*Min_value*/ "-1.00" ,\
310 /*Max Value*/ "1.00" \
312 /*PARAMETER PROTOTYPE: diatances to count */
317 /*Name*/ "-filter_aln" ,\
320 /*OPTIONAL?*/ OPTIONAL ,\
322 /*DOC*/ "Lower Case For Residues Filtered Out" ,\
323 /*Parameter*/ &filter_aln,\
329 /*PARAMETER PROTOTYPE: diatances to count */
334 /*Name*/ "-irmsd_graph" ,\
337 /*OPTIONAL?*/ OPTIONAL ,\
339 /*DOC*/ "Outputs the irmsd, position/position" ,\
340 /*Parameter*/ &irmsd_graph,\
346 /*PARAMETER PROTOTYPE: diatances to count */
351 /*Name*/ "-nirmsd_graph" ,\
354 /*OPTIONAL?*/ OPTIONAL ,\
356 /*DOC*/ "Outputs the NIRMSD VS N Removed Residues Curve" ,\
357 /*Parameter*/ &nirmsd_graph,\
363 /*PARAMETER PROTOTYPE: -rmsd_threshold */
368 /*Name*/ "-md_threshold" ,\
371 /*OPTIONAL?*/ OPTIONAL ,\
374 /*Parameter*/ &md_threshold ,\
377 /*Min_value*/ "any" ,\
378 /*Max Value*/ "any" \
381 /*PARAMETER PROTOTYPE: -maximum distances */
386 /*Name*/ "-maximum_distance" ,\
389 /*OPTIONAL?*/ OPTIONAL ,\
392 /*Parameter*/ &maximum_distance ,\
395 /*Min_value*/ "any" ,\
396 /*Max Value*/ "any" \
400 /*PARAMETER PROTOTYPE: -print_rapdb */
405 /*Name*/ "-print_rapdb" ,\
408 /*OPTIONAL?*/ OPTIONAL ,\
410 /*DOC*/ "Prints the neighborhood of each pair of aligned residues, along with the associated local score" ,\
411 /*Parameter*/ &print_rapdb ,\
414 /*Min_value*/ "any" ,\
415 /*Max Value*/ "any" \
418 /*PARAMETER PROTOTYPE: RUN_NAME */
419 declare_name (run_name);
424 /*Name*/ "-run_name" ,\
427 /*OPTIONAL?*/ OPTIONAL ,\
430 /*Parameter*/ &run_name ,\
431 /*Def 1*/ "default" ,\
433 /*Min_value*/ "default" ,\
434 /*Max Value*/ "any" \
436 /*PARAMETER PROTOTYPE: OUTFILE */
437 /*PARAMETER PROTOTYPE: OUTFILE */
438 declare_name ( outfile);
443 /*Name*/ "-outfile" ,\
446 /*OPTIONAL?*/ OPTIONAL ,\
449 /*Parameter*/ &outfile ,\
451 /*Def 2*/ "default" ,\
452 /*Min_value*/ "default" ,\
453 /*Max Value*/ "any" \
455 /*PARAMETER PROTOTYPE: OUTFILE */
456 declare_name ( apdb_outfile);
461 /*Name*/ "-apdb_outfile" ,\
464 /*OPTIONAL?*/ OPTIONAL ,\
467 /*Parameter*/ &apdb_outfile ,\
468 /*Def 1*/ "stdout" ,\
469 /*Def 2*/ "default" ,\
470 /*Min_value*/ "any" ,\
471 /*Max Value*/ "any" \
474 /*PARAMETER PROTOTYPE: OUTPUT_FORMAT */
475 out_aln_format=declare_char ( 200, STRING);
476 n_out_aln_format=get_cl_param(\
480 /*Name*/ "-output" ,\
483 /*OPTIONAL?*/ OPTIONAL ,\
486 /*Parameter*/ out_aln_format,\
487 /*Def 1*/ "score_html" ,\
489 /*Min_value*/ "any" ,\
490 /*Max Value*/ "any" \
495 /*PARAMETER PROTOTYPE: INFILE */
496 declare_name (color_mode);
501 /*Name*/ "-color_mode" ,\
504 /*OPTIONAL?*/ OPTIONAL ,\
507 /*Parameter*/ &color_mode ,\
510 /*Min_value*/ "any" ,\
511 /*Max Value*/ "any" \
513 /*PARAMETER PROTOTYPE: INFILE */
514 declare_name (output_res_num);
519 /*Name*/ "-seqnos" ,\
522 /*OPTIONAL?*/ OPTIONAL ,\
525 /*Parameter*/ &output_res_num ,\
528 /*Min_value*/ "any" ,\
529 /*Max Value*/ "any" \
531 declare_name (cache);
539 /*OPTIONAL?*/ OPTIONAL ,\
541 /*DOC*/ "use,ignore,update,local, directory name" ,\
542 /*Parameter*/ &cache ,\
544 /*Def 2*/ "update" ,\
545 /*Min_value*/ "any" ,\
546 /*Max Value*/ "any" \
549 declare_name (local_mode);
554 /*Name*/ "-local_mode" ,\
557 /*OPTIONAL?*/ OPTIONAL ,\
559 /*DOC*/ "Mode for choosing the Neighborhood (bubble or window)\nWhen selecting window, maximum distance becomes the window 1/2 size, in residues\nWhen using sphere, maximum_distance is the sphere radius in Angstrom" ,\
560 /*Parameter*/ &local_mode ,\
561 /*Def 1*/ "sphere" ,\
562 /*Def 2*/ "window" ,\
563 /*Min_value*/ "any" ,\
564 /*Max Value*/ "any" \
567 /*PARAMETER PROTOTYPE: IN */
568 template_file_list=declare_char (100, STRING);
569 n_template_file=get_cl_param( \
573 /*Name*/ "-template_file" ,\
576 /*OPTIONAL?*/ OPTIONAL ,\
578 /*DOC*/ "List of templates file for the sequences",\
579 /*Parameter*/ template_file_list , \
580 /*Def 1*/ "_SELF_P_",\
582 /*Min_value*/ "any" ,\
583 /*Max Value*/ "any" \
585 /*PARAMETER PROTOTYPE: MODE */
594 /*OPTIONAL?*/ OPTIONAL ,\
596 /*DOC*/ "Mode: irmsd, ",\
597 /*Parameter*/ &mode , \
600 /*Min_value*/ "any" ,\
601 /*Max Value*/ "any" \
610 /*Name*/ "-prot_min_sim" ,\
611 /*Flag*/ &prot_min_sim ,\
613 /*OPTIONAL?*/ OPTIONAL ,\
615 /*DOC*/ "Minimum similarity between a sequence and its PDB target" ,\
616 /*Parameter*/ &prot_min_sim ,\
619 /*Min_value*/ "any" ,\
620 /*Max Value*/ "any" \
622 set_int_variable ("prot_min_sim", prot_min_sim);
628 /*Name*/ "-prot_max_sim" ,\
629 /*Flag*/ &prot_max_sim ,\
631 /*OPTIONAL?*/ OPTIONAL ,\
633 /*DOC*/ "Maximum similarity between a sequence and its BLAST relatives" ,\
634 /*Parameter*/ &prot_max_sim ,\
637 /*Min_value*/ "any" ,\
638 /*Max Value*/ "any" \
640 set_int_variable ("prot_max_sim", prot_max_sim);
646 /*Name*/ "-prot_min_cov" ,\
647 /*Flag*/ &prot_min_cov ,\
649 /*OPTIONAL?*/ OPTIONAL ,\
651 /*DOC*/ "Minimum coverage of a sequence by its BLAST relatives" ,\
652 /*Parameter*/ &prot_min_cov ,\
655 /*Min_value*/ "any" ,\
656 /*Max Value*/ "any" \
658 set_int_variable ("prot_min_cov", prot_min_cov);
664 /*Name*/ "-pdb_min_sim" ,\
665 /*Flag*/ &pdb_min_sim ,\
667 /*OPTIONAL?*/ OPTIONAL ,\
669 /*DOC*/ "Minimum similarity between a sequence and its PDB target" ,\
670 /*Parameter*/ &pdb_min_sim ,\
673 /*Min_value*/ "any" ,\
674 /*Max Value*/ "any" \
677 set_int_variable ("pdb_min_sim", pdb_min_sim);
682 /*Name*/ "-pdb_max_sim" ,\
683 /*Flag*/ &pdb_max_sim ,\
685 /*OPTIONAL?*/ OPTIONAL ,\
687 /*DOC*/ "Maximum similarity between a sequence and its PDB target" ,\
688 /*Parameter*/ &pdb_max_sim ,\
691 /*Min_value*/ "any" ,\
692 /*Max Value*/ "any" \
694 set_int_variable ("pdb_max_sim", pdb_max_sim);
699 /*Name*/ "-pdb_min_cov" ,\
700 /*Flag*/ &pdb_min_cov ,\
702 /*OPTIONAL?*/ OPTIONAL ,\
704 /*DOC*/ "Minimum coverage of a sequence by its PDB target" ,\
705 /*Parameter*/ &pdb_min_cov ,\
708 /*Min_value*/ "any" ,\
709 /*Max Value*/ "any" \
711 set_int_variable ("pdb_min_cov", pdb_min_cov);
715 declare_name (pdb_blast_server);
720 /*Name*/ "-pdb_blast_server" ,\
723 /*OPTIONAL?*/ OPTIONAL ,\
726 /*Parameter*/&pdb_blast_server ,\
728 /*Def 2*/ "default" ,\
729 /*Min_value*/ "any" ,\
730 /*Max Value*/ "any" \
732 declare_name (prot_blast_server);
740 /*OPTIONAL?*/ OPTIONAL ,\
743 /*Parameter*/&prot_blast_server ,\
746 /*Min_value*/ "any" ,\
747 /*Max Value*/ "any" \
749 //make sure that -blast and -blast_server are both supported blast>blast_server
750 if ( !prot_blast_server[0])
756 /*Name*/ "-blast_server" ,\
759 /*OPTIONAL?*/ OPTIONAL ,\
762 /*Parameter*/&prot_blast_server ,\
764 /*Def 2*/ "default" ,\
765 /*Min_value*/ "any" ,\
766 /*Max Value*/ "any" \
769 set_string_variable ("blast_server", prot_blast_server);
773 declare_name (pdb_db);
778 /*Name*/ "-pdb_db" ,\
781 /*OPTIONAL?*/ OPTIONAL ,\
783 /*DOC*/ "Non Redundant PDB database" ,\
784 /*Parameter*/&pdb_db ,\
786 /*Def 2*/ "default" ,\
787 /*Min_value*/ "any" ,\
788 /*Max Value*/ "any" \
790 set_string_variable ("pdb_db", pdb_db);
793 declare_name (prot_db);
798 /*Name*/ "-protein_db" ,\
801 /*OPTIONAL?*/ OPTIONAL ,\
804 /*Parameter*/&prot_db ,\
805 /*Def 1*/ "uniprot" ,\
806 /*Def 2*/ "default" ,\
807 /*Min_value*/ "any" ,\
808 /*Max Value*/ "any" \
810 // set the correct mode:
811 if ( strm (argv[0], "trmsd"))sprintf (mode, "trmsd");
813 set_string_variable ("prot_db", prot_db);
816 if (argc==1){myexit (EXIT_SUCCESS);}
818 if ( strm (outfile,"no"))n_out_aln_format=0;
820 get_cl_param( argc, argv,&le, NULL,NULL,NULL,0,0,NULL);
821 prepare_cache (cache);
825 sprintf ( aln, "%s", argv[1]);
829 printf_exit (EXIT_FAILURE, stderr, "\n\n---- ERROR: File %s must be a valid alignment [FATAL:%s-%s]\n\n",aln,argv[0], PROGRAM);
832 pdb_param=vcalloc ( 1, sizeof(Pdb_param));
834 pdb_param->similarity_threshold=similarity_threshold;
836 pdb_param->md_threshold=md_threshold;
837 pdb_param->maximum_distance=maximum_distance;
839 if ( n_excluded_nb>0)
840 pdb_param->n_excluded_nb=n_excluded_nb;
841 else if ( n_excluded_nb==-1)
842 pdb_param->n_excluded_nb=(int)((float)maximum_distance/(float)1.57);
843 /* Exclude all the nb within the bubble at +1, +2, +n*/
844 pdb_param->print_rapdb=print_rapdb;
845 pdb_param->comparison_io=comparison_io;
847 pdb_param->local_mode=local_mode;
848 pdb_param->color_mode=lower_string (color_mode);
849 pdb_param->filter=filter;
850 pdb_param->filter_aln=filter_aln;
851 pdb_param->irmsd_graph=irmsd_graph;
852 pdb_param->nirmsd_graph=nirmsd_graph;
854 sprintf ( list_file[n_list++], "S%s", aln);
857 if (!strm (repeat_seq, ""))
860 sprintf ( template_file_list[0], "%s", process_repeat (list_file[0], repeat_seq, repeat_pdb));
861 fprintf ( le, "\n##Turn a repeat List into a Template File\n");
862 le=display_file_content (le,template_file_list[0]);
863 fprintf ( le, "\n\n");
865 S=read_seq_in_n_list (list_file, n_list, NULL, NULL);
867 le=display_sequences_names ( S,le,0, 0);
869 if ( n_template_file)
871 fprintf ( le, "\nLooking For Sequence Templates:\n");
872 for ( a=0; a< n_template_file; a++)
874 fprintf ( le, "\n\tTemplate Type: [%s] Mode Or File: [%s] [Start", template_type2type_name(template_file_list[a]), template_file_list[a]);
875 S=seq2template_seq(S, template_file_list[a], F);
880 if ( !strm (run_name, "default"))
882 F=parse_fname(run_name);
883 sprintf (F->name, "%s", F->full);
890 for ( a=0; a< S->nseq; a++)
894 p=seq2T_value (S, a, "template_file", "_P_");
896 if (p)sprintf (S->file[a], "%s",p);
899 CL=declare_constraint_list ( S,NULL, NULL, 0,NULL, NULL);
900 CL->T=vcalloc (S->nseq,sizeof (Ca_trace*));
903 for ( n_pdb=0,a=0; a<S->nseq; a++)
905 if ( !is_pdb_file ( S->file[a])){CL->T[a]=NULL;continue;}
906 CL->T[a]=read_ca_trace (S->file[a], "ATOM");
907 CL->T[a]=trim_ca_trace (CL->T[a], S->seq[a]);
908 (CL->T[a])->pdb_param=pdb_param;
915 A->residue_case=KEEP_CASE;
916 A=main_read_aln(aln, A);
920 if ( strm (apdb_outfile, "default"))
921 sprintf ( apdb_outfile, "%s.apdb_result", F->name);
926 fp=vfopen (apdb_outfile, "w");
927 fprintf (fp, "\nYour Alignment Does Not Contain Enough Sequences With a known Structure\n");
928 fprintf (fp, "To Use APDB, your alignment must include at least TWO sequences with a known structure.\n");
929 fprintf (fp, "These sequences must be named according to their PDB identifier, followed by the chain index (if any) ex: 1fnkA\n");
930 fprintf (fp, "[FATAL:%s]\n", PROGRAM);
933 else if ( strm (mode, "irmsd"))
935 EA=analyse_pdb ( A, EA, apdb_outfile);
937 else if ( strm (mode, "msa2tree") || strm (mode, "trmsd"))
939 EA=msa2struc_dist ( A, EA,F->name);
941 le=display_output_filename ( le, "APDB_RESULT", "APDB_RESULT_FORMAT_01", apdb_outfile, CHECK);
945 declare_name (file_name);
946 for ( a=0; a< n_out_aln_format; a++)
948 if ( strm2( outfile, "stdout", "stderr"))sprintf (file_name, "%s", outfile);
949 else if ( strm (outfile, "default"))
950 sprintf (file_name, "%s.%s",F->name, out_aln_format[a]);
952 sprintf (file_name, "%s.%s",outfile,out_aln_format[a]);
954 output_format_aln (out_aln_format[a],A,EA,file_name);
955 le=display_output_filename ( le, "MSA", out_aln_format[a], file_name, CHECK);
963 Constraint_list * set_constraint_list4align_pdb (Constraint_list *CL,int seq, char *dp_mode, char *local_mode, char *param_file)
965 static Constraint_list *PWCL;
966 static Pdb_param *pdb_param;
972 free_constraint_list (PWCL);
977 PWCL=declare_constraint_list ( CL->S,NULL, NULL, 0,NULL, NULL);
979 pdb_param=vcalloc ( 1, sizeof(Pdb_param));
981 pdb_param->max_delta=2.0;
982 pdb_param->maximum_distance=14;
983 declare_name (pdb_param->local_mode);
984 sprintf (pdb_param->local_mode, "%s", local_mode);
987 PWCL->pw_parameters_set=1;
989 PWCL->lalign_n_top=10;
990 PWCL->sw_min_dist=10;
992 PWCL->T=vcalloc ( (PWCL->S)->nseq, sizeof (Ca_trace*));
999 sprintf (CL->matrix_for_aa_group, "vasiliky");
1000 PWCL->use_fragments=0;
1006 if ( param_file && check_file_exists ( param_file) )
1008 if ( (x=get_parameter ( "-nca", &n, param_file))!=NULL){pdb_param->N_ca=atoi(x[0]);free_char (x, -1);}
1009 if ( (x=get_parameter ( "-max_delta", &n, param_file))!=NULL){pdb_param->max_delta=atof(x[0]);free_char (x, -1);}
1010 if ( (x=get_parameter ( "-maximum_distance", &n, param_file))!=NULL){pdb_param->maximum_distance=atoi(x[0]);free_char (x, -1);}
1011 if ( (x=get_parameter ( "-local_mode", &n, param_file))!=NULL){sprintf (pdb_param->local_mode, "%s",x[0]);free_char (x, -1);}
1012 if ( (x=get_parameter ( "-scale", &n, param_file))!=NULL){pdb_param->scale=atoi(x[0]);free_char (x, -1);}
1013 if ( (x=get_parameter ( "-gapopen", &n, param_file))!=NULL){PWCL->gop=atoi(x[0]);free_char (x, -1);}
1014 if ( (x=get_parameter ( "-gapext" , &n, param_file))!=NULL){PWCL->gep=atof(x[0]);free_char (x, -1);}
1021 sprintf ( PWCL->dp_mode, "%s", dp_mode);
1023 if (strm (PWCL->dp_mode, "lalign"))sprintf (PWCL->dp_mode,"sim_pair_wise_lalign");
1024 else if (strm (PWCL->dp_mode, "sw"))sprintf (PWCL->dp_mode,"gotoh_pair_wise_sw");
1026 local_mode=pdb_param->local_mode;
1027 if ( strm ( local_mode, "hasch_ca_trace_nb")) PWCL->evaluate_residue_pair=evaluate_ca_trace_nb;
1028 else if ( strm ( local_mode, "hasch_ca_trace_bubble")) PWCL->evaluate_residue_pair=evaluate_ca_trace_bubble;
1029 else if ( strm ( local_mode, "hasch_ca_trace_sap1_bubble")) PWCL->evaluate_residue_pair=evaluate_ca_trace_sap1_bubble;
1030 else if ( strm ( local_mode, "hasch_ca_trace_sap2_bubble")) PWCL->evaluate_residue_pair=evaluate_ca_trace_sap2_bubble;
1032 else if ( strm ( local_mode, "hasch_ca_trace_transversal")) PWCL->evaluate_residue_pair=evaluate_ca_trace_transversal;
1033 else if ( strm ( local_mode, "hasch_ca_trace_bubble_2")) PWCL->evaluate_residue_pair=evaluate_ca_trace_bubble_2;
1034 else if ( strm ( local_mode, "hasch_ca_trace_bubble_3")) PWCL->evaluate_residue_pair=evaluate_ca_trace_bubble_3;
1035 else if ( strm ( local_mode, "custom_pair_score_function1")) PWCL->evaluate_residue_pair=custom_pair_score_function1;
1036 else if ( strm ( local_mode, "custom_pair_score_function2")) PWCL->evaluate_residue_pair=custom_pair_score_function2;
1037 else if ( strm ( local_mode, "custom_pair_score_function3")) PWCL->evaluate_residue_pair=custom_pair_score_function3;
1038 else if ( strm ( local_mode, "custom_pair_score_function4")) PWCL->evaluate_residue_pair=custom_pair_score_function4;
1039 else if ( strm ( local_mode, "custom_pair_score_function5")) PWCL->evaluate_residue_pair=custom_pair_score_function5;
1040 else if ( strm ( local_mode, "custom_pair_score_function6")) PWCL->evaluate_residue_pair=custom_pair_score_function6;
1041 else if ( strm ( local_mode, "custom_pair_score_function7")) PWCL->evaluate_residue_pair=custom_pair_score_function7;
1042 else if ( strm ( local_mode, "custom_pair_score_function8")) PWCL->evaluate_residue_pair=custom_pair_score_function8;
1043 else if ( strm ( local_mode, "custom_pair_score_function9")) PWCL->evaluate_residue_pair=custom_pair_score_function9;
1044 else if ( strm ( local_mode, "custom_pair_score_function10")) PWCL->evaluate_residue_pair=custom_pair_score_function10;
1049 fprintf ( stderr, "\n%s is an unknown hasch mode, [FATAL]\n", local_mode);
1050 myexit (EXIT_FAILURE);
1056 PWCL->T[seq]=read_ca_trace (is_pdb_struc((CL->S)->name[seq]), "ATOM");
1057 (PWCL->T[seq])->pdb_param=pdb_param;
1058 PWCL->T[seq]=trim_ca_trace (PWCL->T[seq], (CL->S)->seq[seq]);
1059 PWCL->T[seq]=hasch_ca_trace(PWCL->T[seq]);
1069 int evaluate_ca_trace_nb (Constraint_list *CL, int s1, int r1, int s2, int r2)
1072 return (int)(neighborhood_match(CL, s1,r1, s2, r2, (CL->T[s1])->Chain,(CL->T[s2])->Chain ));
1074 int evaluate_ca_trace_sap2_bubble (Constraint_list *CL, int s1, int r1, int s2, int r2)
1079 return sap2_neighborhood_match (CL, s1, r1, s2, r2, (CL->T[s1])->Bubble,(CL->T[s2])->Bubble );
1082 int evaluate_ca_trace_sap1_bubble (Constraint_list *CL, int s1, int r1, int s2, int r2)
1085 Function documentation: start
1087 int evaluate_ca_trace_sap1_bubble (Constraint_list *CL, int s1, int s2, int r1, int r2)
1088 This function evaluates the cost for matching two residues:
1090 a1 is the cost for matching the two neighborood ( bubble type), using sap
1091 a1: [0,+100], +100 is the best possible match.
1092 a2 is the residue type weight:
1093 min=worst substitution value
1094 best=best of r1/r1, r2/r2-min
1096 a2=(r1/r2 -min)/best --> a1:[0, 100]
1098 score=a1*a2-->[-inf, +10000];
1106 a1=(int) sap1_neighborhood_match (CL, s1, r1, s2, r2, (CL->T[s1])->Bubble,(CL->T[s2])->Bubble );
1112 int evaluate_ca_trace_bubble (Constraint_list *CL, int s1, int r1, int s2, int r2)
1115 Function documentation: start
1117 int evaluate_ca_trace_bubble (Constraint_list *CL, int s1, int s2, int r1, int r2)
1118 This function evaluates the cost for matching two residues:
1120 a1 is the cost for matching the two neighborood ( bubble type)
1121 a1: [-inf,+100-scale], +100-scale is the best possible match.
1131 a1=(int) neighborhood_match (CL, s1, r1, s2, r2, (CL->T[s1])->Bubble,(CL->T[s2])->Bubble )-((CL->T[s1])->pdb_param)->scale;
1137 int evaluate_ca_trace_transversal (Constraint_list *CL, int s1, int r1, int s2, int r2)
1139 return (int)(transversal_match (CL, s1, r1, s2, r2, (CL->T[s1])->Transversal,(CL->T[s2])->Transversal ));
1142 int evaluate_ca_trace_bubble_3 (Constraint_list *CL, int s1, int r1, int s2, int r2)
1144 /*This Mode evaluates :
1147 2-The Match of the transversal residues
1154 l1=MAX(( (CL->T[s1])->Chain )->nb[r1][0] ,((CL->T[s2])->Chain )->nb[r2][0]);
1155 l2=MAX(( (CL->T[s1])->Bubble)->nb[r1][0], ((CL->T[s2])->Bubble)->nb[r2][0]);
1157 a1=(int)(neighborhood_match (CL, s1, r1, s2, r2, (CL->T[s1])->Bubble,(CL->T[s2])->Bubble ));
1158 a2=(int)(transversal_match (CL, s1, r1, s2, r2, (CL->T[s1])->Transversal,(CL->T[s2])->Transversal ));
1160 if ( !l1 && !l2)return 0;
1164 int evaluate_ca_trace_bubble_2 (Constraint_list *CL, int s1, int r1, int s2, int r2)
1166 /*This Mode evaluates :
1167 1-The Ca neighborhood
1172 return (int)((neighborhood_match (CL, s1, r1, s2, r2, (CL->T[s1])->Chain,(CL->T[s2])->Chain )));
1176 /*********************************************************************************************/
1178 /* FUNCTIONS FOR COMPARING TWO NEIGHBORHOODS:START */
1180 /*********************************************************************************************/
1181 float matrix_match (Constraint_list *CL, int s1, int r1, int s2, int r2, Struct_nb *nbs1, Struct_nb *nbs2)
1185 Function documentation: start
1187 float matrix_match (Constraint_list *CL, int s1, int s2, int r1, int r2, Struct_nb *nbs1, Struct_nb *nbs2)
1188 This function evaluates the matrix for matching two residues:
1190 min=worst substitution value
1191 best=best of r1/r1, r2/r2-min
1193 a2=(r1/r2 -min)/best --> a1:[0, 100]
1195 score=a1*a2-->[-inf, +10000];
1207 CL->M=read_matrice ( "pam250mt");
1209 for ( a=0; a< 26; a++)
1210 for ( b=0; b< 26; b++)min=MIN(min, CL->M[a][b]);
1213 if ( r1<=0 || r2<=0)return 0;
1214 m1=CL->M[(CL->S)->seq[s1][r1-1]-'A'][(CL->S)->seq[s1][r1-1]-'A']-min;
1215 m2=CL->M[(CL->S)->seq[s2][r2-1]-'A'][(CL->S)->seq[s2][r2-1]-'A']-min;
1217 a2=(CL->M[(CL->S)->seq[s1][r1-1]-'A'][(CL->S)->seq[s2][r2-1]-'A']-min)/m;
1223 float transversal_match (Constraint_list *CL, int s1, int r1, int s2, int r2, Struct_nb *nbs1, Struct_nb *nbs2)
1227 float delta, max_delta;
1231 PP=(CL->T[s1])->pdb_param;
1232 max_delta=PP->max_delta;
1237 if ( l1!=l2 || l1<(PP->N_ca)) return 0;
1240 max=MAX(l1, l2)*max_delta;
1241 for ( delta=0,a=0; a< l2 ; a++)
1244 delta+=max_delta-FABS((nbs1->d_nb[r1][a]-nbs2->d_nb[r2][a]));
1246 score=(delta*100)/max;
1253 float neighborhood_match (Constraint_list *CL, int s1, int r1, int s2, int r2, Struct_nb *nbs1, Struct_nb *nbs2)
1255 static float **table;
1256 static int table_size;
1259 float ins, del, sub;
1260 float delta, max_delta;
1265 PP=(CL->T[s1])->pdb_param;
1266 max_delta=PP->max_delta;
1269 if ( r1> 0 && r2 >0) {r1--; r2--;}
1275 if (table_size< (MAX(l1, l2)+1))
1277 table_size=MAX(l1, l2)+1;
1278 if ( table)free_float (table, -1);
1281 if ( !table) table=declare_float (table_size, table_size);
1284 max=MAX(l1, l2)*max_delta;
1285 if ( max==0)return 0;
1289 for ( b=1; b<=l2; b++)
1293 for ( a=1; a<=l1; a++)
1296 for ( b=1; b<=l2 ; b++)
1299 delta=max_delta-FABS((nbs1->d_nb[r1][a]-nbs2->d_nb[r2][b]));
1303 sub= table[a-1][b-1]+delta;
1305 if ( del >= ins && del >= sub)score=del;
1306 else if ( ins >= del && ins >= sub) score=ins;
1313 score=((((score)*100)/max));
1319 float sap1_neighborhood_match (Constraint_list *CL, int s1, int r1, int s2, int r2, Struct_nb *nbs1, Struct_nb *nbs2)
1322 Function documentation: start
1324 float sap1_neighborhood_match (Constraint_list *CL, int s1, int s2, int r1, int r2, Struct_nb *nbs1, Struct_nb *nbs2)
1325 This function is adapted from Taylor, Orengo, Protein Structure Alignment JMB 1989, (208)1-22
1326 It is the first function where
1327 score= A/(|dra-drb|+b)
1329 Function documentation: end
1332 static float **table;
1333 static int table_size;
1336 float ins, del, sub;
1348 if ( r1> 0 && r2 >0) {r1--; r2--;}
1354 if (table_size< (MAX(l1, l2)+1))
1356 table_size=MAX(l1, l2)+1;
1357 if ( table)free_float (table, -1);
1360 if ( !table) table=declare_float (table_size, table_size);
1363 max=MAX(l1, l2)*(A/B);
1364 if ( max==0)return 0;
1368 for ( b=1; b<=l2; b++)
1372 for ( a=1; a<=l1; a++)
1375 for ( b=1; b<=l2 ; b++)
1378 delta=A/(FABS((nbs1->d_nb[r1][a]-nbs2->d_nb[r2][b]))+B);
1382 sub= table[a-1][b-1]+delta;
1383 if ( del >= ins && del >= sub)score=del;
1384 else if ( ins >= del && ins >= sub) score=ins;
1391 score=((score*100))/(max);
1397 float sap2_neighborhood_match (Constraint_list *CL, int s1, int r1, int s2, int r2, Struct_nb *nbs1, Struct_nb *nbs2)
1400 Function documentation: start
1402 float sap1_neighborhood_match (Constraint_list *CL, int s1, int s2, int r1, int r2, Struct_nb *nbs1, Struct_nb *nbs2)
1403 This function is adapted from Taylor, Orengo, Protein Structure Alignment JMB 1989, (208)1-22
1404 It is the first function where
1405 score= A/(|dra-drb|+b)
1407 Function documentation: end
1410 static float **table;
1411 static int table_size;
1414 float ins, del, sub;
1420 static Atom *vX_1, *vY_1, *vZ_1;
1421 static Atom *vX_2, *vY_2, *vZ_2;
1422 static Atom *ca1, *ca2;
1431 if ( r1> 0 && r2 >0) {r1--; r2--;}
1434 /*Make up the referencial*/
1435 pep1=(CL->T[s1])->peptide_chain;
1436 pep2=(CL->T[s2])->peptide_chain;
1438 /*Get Referencial for CA1*/
1439 if ( (pep1[r1])->C)vX_1 =diff_atom(pep1[r1]->C,pep1[r1]->CA, vX_1);
1440 if ( (pep1[r1])->N)vY_1 =diff_atom(pep1[r1]->N,pep1[r1]->CA, vY_1);
1441 if ( (pep1[r1])->CB)vZ_1=diff_atom(pep1[r1]->CB,(pep1[r1])->CA,vZ_1);
1442 else vZ_1=add_atom (vX_1, vY_1, vZ_1);
1448 /*Get Referencial for CA2*/
1449 if ( (pep2[r2])->C)vX_2 =diff_atom((pep2[r2])->C,(pep2[r2])->CA, vX_2);
1450 if ( (pep2[r2])->N)vY_2 =diff_atom((pep2[r2])->N,(pep2[r2])->CA, vY_2);
1451 if ( (pep2[r2])->CB)vZ_2=diff_atom((pep2[r2])->CB,(pep2[r2])->CA, vZ_2);
1452 else vZ_2=add_atom (vX_2, vY_2, vZ_2);
1457 /*END OF GETTING REFERENCIAL*/
1462 fprintf (stdout,"\n\t*******");
1464 fprintf (stdout, "RESIDUE %d %c", r1, (CL->S)->seq[s1][r1]);
1465 if ( (pep1[r1])->CA)fprintf (stdout,"\n\tCA ");print_atom (pep1[r1]->CA );
1466 if ( (pep1[r1])->C)fprintf (stdout,"\n\tC ");print_atom (pep1[r1]->C );
1467 if ( (pep1[r1])->N)fprintf (stdout,"\n\tN ");print_atom (pep1[r1]->N );
1468 if ( (pep1[r1])->CB)fprintf (stdout,"\n\tCB ");print_atom (pep1[r1]->CB );
1469 fprintf (stdout,"\n\t*******");
1470 fprintf (stdout,"\n\tvX ");print_atom ( vX_1);
1471 fprintf (stdout,"\n\tvY ");print_atom ( vY_1);
1472 fprintf (stdout,"\n\tvZ ");print_atom ( vZ_1);
1474 ca1= copy_atom ((pep1[r1-1])->CA, ca1);
1475 ca1 =diff_atom(ca1,(pep1[r1])->CA, ca1);
1476 fprintf (stdout,"\n\tca ");print_atom ( ca1);
1477 fprintf ( stdout, "\n\tSQ1=%d ", (int)square_atom(ca1));
1478 ca1=reframe_atom(vX_1, vY_1, vZ_1, ca1, ca1);
1479 fprintf ( stdout, "\n\tSQ2=%d ", (int)square_atom(ca1));
1480 fprintf (stdout,"\n\tca ");print_atom ( ca1);
1481 fprintf (stdout,"\n\n");
1488 if (table_size< (MAX(l1, l2)+1))
1490 table_size=MAX(l1, l2)+1;
1491 if ( table)free_float (table, -1);
1494 if ( !table) table=declare_float (table_size, table_size);
1497 max=MAX(l1, l2)*(A/B);
1499 if ( max==0)return 0;
1503 for ( b=1; b<=l2; b++)
1508 for ( a=1; a<=l1; a++)
1510 ca1=copy_atom ((CL->T[s1])->structure[nbs1->nb[r1][a]], ca1);
1511 ca1=diff_atom(ca1,(pep1[r1])->CA, ca1);
1512 ca1=reframe_atom(vX_1, vY_1, vZ_1, ca1, ca1);
1515 for ( b=1; b<=l2 ; b++)
1517 ca2 =copy_atom((CL->T[s2])->structure[nbs2->nb[r2][b]], ca2);
1518 ca2 =diff_atom(ca2,(pep2[r2])->CA, ca2);
1519 ca2 =reframe_atom(vX_2, vY_2, vZ_2, ca2, ca2);
1521 ca2=diff_atom(ca2,ca1,ca2);
1522 val=square_atom (ca2);
1524 val=(float)sqrt ((double)val);
1531 sub= table[a-1][b-1]+delta;
1533 if ( del >= ins && del >= sub)score=del;
1534 else if ( ins >= del && ins >= sub) score=ins;
1541 score=(((score*100))/(max)-50);
1547 /*********************************************************************************************/
1551 /*********************************************************************************************/
1552 float **** irmsdmin_apdb_filtration ( Alignment *A, float ****residues, Constraint_list *CL, Pdb_param *PP, FILE *fp)
1554 int s1, s2, a,col1, n,n2=0, t,flag;
1556 float nirmsd, min_nirmsd,max_nirmsd,ref_sum, sum, sum2;
1557 float **normalized_len;
1559 normalized_len=declare_float (A->nseq+1, A->nseq+1);
1560 for (s1=0; s1<A->nseq; s1++)
1562 int l1, l2, r1, r2, p;
1563 for (s2=0; s2<A->nseq; s2++)
1565 for ( l1=l2=p=0; p< A->len_aln; p++)
1567 r1=A->seq_al[s1][p];
1568 r2=A->seq_al[s2][p];
1569 if (!is_gap(r1) && isupper(r1))l1++;
1570 if (!is_gap(r2) && isupper(r2))l2++;
1572 normalized_len[s1][s2]=MIN(l1,l2);
1576 pos=aln2pos_simple (A, A->nseq);
1577 for ( s1=0; s1< A->nseq; s1++)
1578 for ( s2=0; s2<A->nseq; s2++)
1580 if ( s1==s2) continue;
1581 else if (!(CL->T[A->order[s1][0]]) || !(CL->T[A->order[s2][0]]))continue;
1583 list=declare_int (A->len_aln, 2);
1585 for ( sum=0,n=0,col1=0; col1< A->len_aln; col1++)
1587 if ( islower (A->seq_al[s1][col1]) || islower ( A->seq_al[s2][col1]))continue;
1588 else if ( pos[s1][col1]<=0 || pos[s2][col1]<=0 ) continue;
1589 else if ( residues[s1][s2][pos[s1][col1]-1][0]==0)continue;
1591 list[n][0]=pos[s1][col1]-1;
1592 list[n][1]=(int)100000*residues[s1][s2][pos[s1][col1]-1][4];
1593 sum2+=residues[s1][s2][pos[s1][col1]-1][4];
1597 if (n==0)return residues;
1599 sort_int_inv (list, 2, 1,0, n-1);
1600 for (sum=0,a=0; a<n; a++)
1605 nirmsd=min_nirmsd=max_nirmsd=sum/(n*n);
1609 /*1 Find the maximum*/
1611 for (flag=0,a=0; a< n-1; a++)
1614 nirmsd=sum/((n-(a+1))*(n-(a+1)));
1615 if (nirmsd<max_nirmsd)flag=1;
1616 if ((nirmsd>=max_nirmsd) && flag==1)break;
1621 for (a=0; a<n2-1; a++)
1624 nirmsd=sum/((n-(a+1))*(n-(a+1)));
1627 if ( nirmsd<min_nirmsd)
1631 if ( PP->nirmsd_graph)
1633 fprintf ( stdout, "\n_NIRMSD_GRAPH %s %s POS: %4d Removed: %4d NiRMSD: %.2f", A->name[s1], A->name[s2], list[a][0],a,(nirmsd/100000)*normalized_len[s1][s2]);
1638 if ( PP->print_rapdb)
1640 for ( a=0; a<n; a++)
1642 if ( list[a][1]>0 && a<=t)fprintf ( stdout, "\nRAPDB QUANTILE REMOVE S1: %3d S2: %3d COL: %3d SCORE*100: %d", s1, s2, list[a][0], list[a][1]);
1643 else if ( list[a][1]>0 && a>t)fprintf ( stdout, "\nRAPDB QUANTILE KEEP S1: %3d S2: %3d COL: %3d SCORE*100: %d", s1, s2, list[a][0], list[a][1]);
1647 fprintf ( stdout, "\n# MINIMISATION FILTER ON: NiRMSD minimsation resulted in the removal of %d [out of %d] Columns On the alignment %s Vs %s\n", t, n, A->name[s1], A->name[s2]);
1648 for ( a=0; a<=t; a++)
1651 residues[s1][s2][list[a][0]][0]=0;
1652 residues[s1][s2][list[a][0]][1]=0;
1653 residues[s1][s2][list[a][0]][2]=0;
1654 residues[s1][s2][list[a][0]][3]=0;
1655 residues[s1][s2][list[a][0]][4]=-1;
1659 free_int (list, -1);
1661 free_float (normalized_len, -1);
1664 float **** quantile_apdb_filtration ( Alignment *A, float ****residues, Constraint_list *CL, Pdb_param *PP,FILE *fp)
1666 int s1, s2, a,col1, n, t;
1669 pos=aln2pos_simple (A, A->nseq);
1670 for ( s1=0; s1< A->nseq; s1++)
1671 for ( s2=0; s2<A->nseq; s2++)
1673 if ( s1==s2) continue;
1674 else if (!(CL->T[A->order[s1][0]]) || !(CL->T[A->order[s2][0]]))continue;
1676 list=declare_int (A->len_aln, 2);
1678 for ( n=0,col1=0; col1< A->len_aln; col1++)
1680 if ( islower (A->seq_al[s1][col1]) || islower ( A->seq_al[s2][col1]))continue;
1681 else if ( pos[s1][col1]<=0 || pos[s2][col1]<=0 ) continue;
1683 list[n][0]=pos[s1][col1]-1;
1684 list[n][1]=(int)100*residues[s1][s2][pos[s1][col1]-1][4];
1689 sort_int_inv (list, 2, 1,0, n-1);
1691 t=quantile_rank ( list,1, n,PP->filter);
1693 if ( PP->print_rapdb)
1695 for ( a=0; a<n; a++)
1697 if ( list[a][1]>0 && a<t)fprintf ( stdout, "\nRAPDB QUANTILE REMOVE S1: %3d S2: %3d COL: %3d SCORE*100: %d", s1, s2, list[a][0], list[a][1]);
1698 else if ( list[a][1]>0 && a>t)fprintf ( stdout, "\nRAPDB QUANTILE KEEP S1: %3d S2: %3d COL: %3d SCORE*100: %d", s1, s2, list[a][0], list[a][1]);
1702 for ( a=0; a<t; a++)
1705 residues[s1][s2][list[a][0]][0]=0;
1706 residues[s1][s2][list[a][0]][1]=0;
1707 residues[s1][s2][list[a][0]][2]=0;
1708 residues[s1][s2][list[a][0]][3]=0;
1709 residues[s1][s2][list[a][0]][4]=-1;
1713 free_int (list, -1);
1718 Alignment * analyse_pdb ( Alignment *A, Alignment *ST, char *results)
1720 int s1,s2,r1, r2,b, p;
1722 float **normalize_len;
1724 float pair_tot=0, pair_m1, pair_m2, pair_m3, pair_m4, pair_m5, pair_len=0;
1725 float seq_tot, seq_m1, seq_m2, seq_m3, seq_m4, seq_m5,seq_len;
1726 float msa_tot, msa_m1, msa_m2, msa_m3, msa_m4, msa_m5, msa_len;
1727 float iRMSD_unit, iRMSD_max, iRMSD_min;
1730 Constraint_list *CL;
1731 char *average_file, *pairwise_file, *total_file, *irmsd_file=0;
1732 FILE *fp, *average,*pairwise, *total, *irmsd_graph=0;
1735 fp =vfopen ( results, "w");
1736 pairwise=vfopen ((pairwise_file=vtmpnam (NULL)),"w");
1737 average =vfopen ((average_file =vtmpnam (NULL)),"w");
1738 total =vfopen ((total_file =vtmpnam (NULL)),"w");
1743 for ( s1=0; s1< (A->S)->nseq; s1++)
1744 if ( CL->T[s1]){PP=(CL->T[s1])->pdb_param;break;}
1746 if (PP->irmsd_graph)irmsd_graph =vfopen ((irmsd_file =vtmpnam (NULL)),"w");
1748 fprintf ( fp, "\nAPDB_RESULT_FORMAT_02\n");
1749 residues=analyse_pdb_residues ( A, A->CL,PP);
1750 if ( PP->filter>=0)residues=quantile_apdb_filtration (A, residues, A->CL,PP, fp);
1751 else if ( PP->filter<0)residues=irmsdmin_apdb_filtration (A, residues, A->CL,PP, fp);
1753 pos=aln2pos_simple (A, A->nseq);
1759 /*Compute the alignment length for normalization*/
1760 normalize_len=declare_float (A->nseq+1, A->nseq+1);
1761 for (s1=0; s1<A->nseq; s1++)
1764 for (s2=0; s2<A->nseq; s2++)
1766 for ( l1=l2=p=0; p< A->len_aln; p++)
1768 r1=A->seq_al[s1][p];
1769 r2=A->seq_al[s2][p];
1770 if (!is_gap(r1) && isupper(r1))l1++;
1771 if (!is_gap(r2) && isupper(r2))l2++;
1773 normalize_len[s1][s2]=MIN(l1,l2);
1777 msa_len=msa_tot=msa_m1=msa_m2=msa_m3=msa_m4=msa_m5=0;
1779 for ( s1=0; s1< A->nseq; s1++)
1781 if ( !(CL->T[A->order[s1][0]]))continue;
1782 seq_len=seq_tot=seq_m1=seq_m2=seq_m3=seq_m4=seq_m5=0;
1783 for ( s2=0; s2< A->nseq; s2++)
1785 if ( s1==s2)continue;
1786 if ( !(CL->T[A->order[s2][0]]))continue;
1787 pair_tot=pair_m1=pair_m2=pair_m3=pair_m4=pair_m5=0;
1788 for ( p=0; p< A->len_aln; p++)
1790 r1=A->seq_al[s1][p];
1791 r2=A->seq_al[s2][p];
1797 if (is_gap(r1) || is_gap(r2) || residues[s1][s2][b][0]==0)
1799 A->seq_al[s1][p]=tolower(r1);
1800 A->seq_al[s2][p]=tolower(r2);
1804 A->seq_al[s1][p]=toupper(r1);
1805 A->seq_al[s2][p]=toupper(r2);
1810 if ( PP->irmsd_graph && ( is_gap(r1) || is_gap(r2) || residues[s1][s2][b][0]==0))
1813 fprintf ( irmsd_graph, "\n_IRMSD_GRAPH %10s %10s ALN: %c%c iRMSD: -1.00", A->name[s1], A->name[s2],A->seq_al[s1][p], A->seq_al[s2][p]);
1816 if (is_gap(r1) || is_gap(r2) || residues[s1][s2][b][0]==0)continue;
1820 m2=(residues[s1][s2][b][2]*100)/residues[s1][s2][b][0];
1821 if (m2>PP->similarity_threshold){pair_m3++;}
1825 m4=residues[s1][s2][b][4];
1827 if ( PP->irmsd_graph )
1829 fprintf ( irmsd_graph, "\nIRMSD_GRAPH %10s %10s ALN: %c%c iRMSD: %.2f", A->name[s1], A->name[s2],A->seq_al[s1][p], A->seq_al[s2][p], m4);
1833 pair_len=normalize_len[s1][s2];
1837 fprintf ( pairwise, "\n\n#PAIRWISE: %s Vs %s",A->name[s1], A->name[s2]);
1838 fprintf ( pairwise, "\n\tPAIRWISE EVALUATED: %6.2f %% [%s Vs %s] ", (pair_len==0)?-1:(pair_tot*100)/pair_len,A->name[s1], A->name[s2]);
1839 fprintf ( pairwise, "\n\tPAIRWISE APDB: %6.2f %% [%s Vs %s] ", (pair_tot==0)?-1:(pair_m3*100)/pair_tot,A->name[s1], A->name[s2]);
1840 fprintf ( pairwise, "\n\tPAIRWISE iRMSD: %6.2f Angs [%s Vs %s]", (pair_tot==0)?-1:pair_m4/pair_tot,A->name[s1], A->name[s2]);
1841 fprintf ( pairwise, "\n\tPAIRWISE NiRMSD: %6.2f Angs [%s Vs %s] [%d pos]", (pair_tot==0)?-1:(pair_m4*pair_len)/(pair_tot*pair_tot), A->name[s1], A->name[s2], (int)pair_tot);
1842 fprintf ( pairwise, "\n\tRAPDB PAIRS PAIRWISE N_NONEMPTY_PAIRS %d N_MAXIMUM_PAIRS %d",(int) pair_tot, (int)pair_len);
1855 fprintf ( average, "\n\n#AVERAGE For Sequence %s", A->name[s1]);
1856 fprintf ( average, "\n\tAVERAGE EVALUATED: %6.2f %% [%s]", (seq_len==0)?-1:(seq_tot*100)/seq_len, A->name[s1]);
1857 fprintf ( average, "\n\tAVERAGE APDB: %6.2f %% [%s]", (seq_tot==0)?-1:(seq_m3*100)/seq_tot, A->name[s1]);
1858 fprintf ( average, "\n\tAVERAGE iRMSD: %6.2f Angs [%s]", (seq_tot==0)?-1:seq_m4/seq_tot, A->name[s1]);
1859 fprintf ( average, "\n\tAVERAGE NiRMS: %6.2f Angs [%s]", (seq_tot==0)?-1:(seq_m4*seq_len)/(seq_tot*seq_tot), A->name[s1]);
1860 if ( strm (PP->color_mode, "apdb"))ST->score_seq[s1]=(seq_tot==0)?-1:(seq_m3*100)/pair_tot;
1861 if (PP->print_rapdb)fprintf (average, "\n\tRAPDB PAIRS AVERAGE N_NONEMPTY_PAIRS %d N_MAXIMUM_PAIRS %d", (int)pair_tot, (int)pair_len);
1863 if ( strm (PP->color_mode, "irmsd"))ST->score_seq[s1]=(seq_tot==0)?-1:10*((seq_m4*pair_len)/(seq_tot*seq_tot));
1866 fprintf ( total, "\n\n#TOTAL for the Full MSA");
1867 fprintf ( total, "\n\tTOTAL EVALUATED: %6.2f %% ", (msa_len==0)?-1:(msa_tot*100)/msa_len);
1868 fprintf ( total, "\n\tTOTAL APDB: %6.2f %% ", (msa_tot==0)?-1:(msa_m3*100)/msa_tot);
1869 fprintf ( total, "\n\tTOTAL iRMSD: %6.2f Angs", (msa_tot==0)?-1:msa_m4/msa_tot);
1870 fprintf ( total, "\n\tTOTAL NiRMSD: %6.2f Angs", (msa_tot==0)?-1:(msa_m4*msa_len)/(msa_tot*msa_tot));
1871 if (PP->print_rapdb)fprintf (total, "\n\tRAPDB PAIRS TOTAL N_NONEMPTY_PAIRS: %d N_MAXIMUM_PAIRS %d", (int)msa_tot, (int)msa_len);
1873 if ( strm (PP->color_mode, "apdb")) ST->score_aln=ST->score=A->score_aln=A->score=(msa_tot==0)?-1:(msa_m3*100)/msa_tot;
1874 if ( strm (PP->color_mode, "irmsd"))ST->score_aln=ST->score=A->score_aln=A->score=(msa_tot==0)?-1:10*((msa_m4*msa_len)/(msa_tot*msa_tot));
1876 vfclose (average);vfclose (total); vfclose (pairwise);if (PP->irmsd_graph)vfclose (irmsd_graph);
1877 fp=display_file_content (fp, pairwise_file);
1878 fp=display_file_content (fp, average_file);
1879 fp=display_file_content (fp, total_file);
1880 if ( PP->irmsd_graph)fp=display_file_content (fp, irmsd_file);
1882 fprintf ( fp, "\n\n# EVALUATED: Fraction of Pairwise Columns Evaluated\n");
1883 fprintf ( fp, "# APDB: Fraction of Correct Columns according to APDB\n");
1884 fprintf ( fp, "# iRMDS: Average iRMSD over all evaluated columns\n");
1885 fprintf ( fp, "# NiRMDS: iRMSD*MIN(L1,L2)/Number Evaluated Columns\n");
1886 fprintf ( fp, "# Main Parameter: -maximum_distance %.2f Angstrom\n", PP->maximum_distance);
1888 fprintf ( fp, "# Undefined values are set to -1 and indicate LOW Alignment Quality\n");
1889 fp=print_program_information (fp, NULL);
1895 for (iRMSD_max=0,iRMSD_min=10000,s1=0; s1<A->nseq; s1++)
1896 for ( s2=0; s2< A->nseq; s2++)
1897 for (p=0; p<A->len_aln; p++)
1899 if ( residues[s1][s2][p][4]>0)
1901 iRMSD_max=MAX(iRMSD_max, residues[s1][s2][p][4]);
1902 iRMSD_min=MAX(iRMSD_min, residues[s1][s2][p][4]);
1906 iRMSD_unit=iRMSD_max/8;
1908 for (p=0; p< A->len_aln; p++)
1909 for ( s1=0; s1< A->nseq; s1++)
1912 for ( p=0; p< A->len_aln; p++)
1914 r1=A->seq_al[s1][p];
1916 if ( is_gap(r1) || !(CL->T[A->order[s1][0]]))
1917 ST->seq_al[s1][p]=NO_COLOR_RESIDUE;
1920 float tot_m2=0, tot_m4=0, v=0;
1923 for (s2=0; s2< A->nseq; s2++)
1925 r2=A->seq_al[s1][p];
1926 if ( s1==s2) continue;
1927 if (is_gap(r2) || !(CL->T[A->order[s1][0]]) || residues[s1][s2][b][0]==0)continue;
1929 seq_m2+=m2=(residues[s1][s2][b][2]*100)/residues[s1][s2][b][0];
1932 m4=residues[s1][s2][b][4];
1940 if (strm ( PP->color_mode, "apdb"))
1942 if (tot_m2==0)v=NO_COLOR_RESIDUE;
1943 else v=MIN((seq_m2/(10*tot_m2)),9);
1945 else if ( strm (PP->color_mode, "irmsd"))
1947 if ( tot_m4==0)v=NO_COLOR_RESIDUE;
1948 else v=(8-(int)((seq_m4/(iRMSD_unit*tot_m4))))+1;
1950 ST->seq_al[s1][p]=v;
1955 for ( p=0; p<A->len_aln; p++) ST->seq_al[A->nseq][p]=NO_COLOR_RESIDUE;
1958 ST->generic_comment=vcalloc ( 100, sizeof (int));
1959 if ( strm (PP->color_mode, "apdb"))
1961 sprintf ( ST->generic_comment, "# APDB Evaluation: Color Range Blue-[0 %% -- 100 %%]-Red\n# Sequence Score: APDB\n# Local Score: APDB\n\n");
1963 else if ( strm (PP->color_mode, "irmsd"))
1965 sprintf ( ST->generic_comment, "\n# iRMSD Evaluation:\n# Sequence score: NiRMSD (Angstrom*10)\n# Local Score: iRMSD, Blue-[%.2f Ang. -- 0.00 Ang.]-Red \n", iRMSD_max);
1968 fprintf ( fp, "\n");
1973 float **** analyse_pdb_residues ( Alignment *A, Constraint_list *CL, Pdb_param *pdb_param)
1977 int s1, s2, rs1, rs2;
1979 float ****distances;
1981 /*Distances[Nseq][len_aln][4]
1982 distances...[0]: Number of residues within the bubble
1983 distances...[1]: Absolute difference of distance of residues within Bubble
1984 distances...[2]: Number of residues within the bubble with Delta dist < md_threshold
1985 distances ..[3]: Sum of squared difference of distances
1986 distances ..[4]: iRMSD
1991 int real_res1_col1=0;
1997 float nrapdb, rapdb;
2001 print_rapdb=PP->print_rapdb;
2003 distances=declare_arrayN(4, sizeof (float), A->nseq, A->nseq, 0, 0);
2005 /*Pre-computation of the internal distances----> T[seq]->ca_dist[len][len]*/
2006 /*Can be avoided if distance_on_request set to 1 */
2008 for ( s1=0; s1< A->nseq; s1++)
2010 rs1=A->order[s1][0];
2011 if (CL->T[rs1] && !(CL->T[rs1])->ca_dist)(CL->T[rs1])->ca_dist=measure_ca_distances(CL->T[rs1]);
2012 for ( s2=0; s2< A->nseq; s2++)
2014 distances[s1][s2]=declare_float ( A->len_aln, 6);
2017 pos=aln2pos_simple (A, A->nseq);
2019 for ( s1=0; s1< A->nseq; s1++)
2020 for ( col1=0; col1< A->len_aln; col1++)
2021 for ( s2=0; s2<A->nseq; s2++)
2023 rs1=A->order[s1][0];
2024 rs2=A->order[s2][0];
2027 if ( s1==s2) continue;
2028 else if (!(CL->T[rs1]) || !(CL->T[rs2]))continue;
2029 else if ( islower (A->seq_al[s1][col1]) || islower ( A->seq_al[s2][col1]))continue;
2030 else if ( pos[s1][col1]<=0 || pos[s2][col1]<=0 ) continue;
2032 if ( print_rapdb && s2>s1)
2035 fprintf ( stdout, "RAPDB S1: %s S2: %s POS %d %d %c %d %c ", A->name[s1], A->name[s2], col1+1, pos[s1][col1],A->seq_al[s1][col1], pos[s2][col1],A->seq_al[s2][col1]);
2036 BA=copy_aln (A, BA);
2037 lower_string (BA->seq_al[s1]);
2038 lower_string (BA->seq_al[s2]);
2039 BA->seq_al[s1][col1]=toupper (BA->seq_al[s1][col1]);
2040 BA->seq_al[s2][col1]=toupper (BA->seq_al[s2][col1]);
2043 for ( col2=0; col2<A->len_aln; col2++)
2046 if (pos[s1][col2]<=0 || pos[s2][col2]<=0 )continue;
2047 else if ( FABS((pos[s1][col2]-pos[s1][col1]))<=PP->n_excluded_nb)continue;
2048 else if ( FABS((pos[s2][col2]-pos[s2][col1]))<=PP->n_excluded_nb)continue;
2049 else if ( islower (A->seq_al[s1][col2]) || islower ( A->seq_al[s2][col2]))continue;
2051 real_res1_col1=pos[s1][col1]-1;
2052 real_res1_col2=pos[s1][col2]-1;
2054 real_res2_col1=pos[s2][col1]-1;
2055 real_res2_col2=pos[s2][col2]-1;
2057 d1=(CL->T[rs1])->ca_dist[real_res1_col1][real_res1_col2];
2058 d2=(CL->T[rs2])->ca_dist[real_res2_col1][real_res2_col2];
2060 if ( d1==UNDEFINED || d2 == UNDEFINED) continue;
2064 if ( strm ( PP->local_mode, "sphere"))
2066 in_bubble= (d1<PP->maximum_distance && d2<PP->maximum_distance)?1:0; ;
2068 else if ( strm ( PP->local_mode, "window"))
2070 wd1=FABS((pos[s1][col2]-pos[s1][col1]));
2071 wd2=FABS((pos[s2][col2]-pos[s2][col1]));
2072 in_bubble= (wd1<PP->maximum_distance && wd2<PP->maximum_distance)?1:0; ;
2077 if ( print_rapdb && s2 >s1)
2079 fprintf ( stdout, "NB %d %d %c %d %c ", col2, pos[s1][col2], A->seq_al[s1][col2], pos[s2][col2], A->seq_al[s2][col2]);
2080 BA->seq_al[s1][col2]=toupper (BA->seq_al[s1][col2]);
2081 BA->seq_al[s2][col2]=toupper (BA->seq_al[s2][col2]);
2083 delta=FABS((d1-d2));
2084 if (delta<PP->md_threshold)
2085 distances[s1][s2][real_res1_col1][2]++;
2086 distances[s1][s2][real_res1_col1][1]+=delta;
2087 distances[s1][s2][real_res1_col1][0]++;
2088 distances[s1][s2][real_res1_col1][3]+=delta*delta;
2094 if ( nrapdb==0)distances[s1][s2][real_res1_col1][4]=-1;
2095 else distances[s1][s2][real_res1_col1][4]=(float)sqrt((double)(rapdb/nrapdb));
2097 if ( print_rapdb && s2>s1)
2101 fprintf ( stdout, "APDB: UNDEFINED\n");
2106 fprintf ( stdout, " APDB: %.2f ",(float)sqrt((double)(rapdb/nrapdb)));
2107 BA->residue_case=KEEP_CASE;unalign_residues (BA, s1, s2);
2108 fprintf ( stdout,"SEQ1: %s %s SEQ2: %s %s\n", BA->name[s1], BA->seq_al[s1], BA->name[s2], BA->seq_al[s2]);
2121 Alignment * msa2struc_dist ( Alignment *A, Alignment *ST, char *results)
2126 int s1, s2, rs1, rs2;
2128 float ****distances;
2134 /*Distances[Nseq][len_aln][4]
2135 distances...[0]: Number of residues within the bubble
2136 distances...[1]: Absolute difference of distance of residues within Bubble
2137 distances...[2]: Number of residues within the bubble with Delta dist < md_threshold
2138 distances ..[3]: Sum of squared difference of distances
2139 distances ..[4]: iRMSD
2141 Pdb_param *pdb_param;
2142 Constraint_list *CL;
2147 int real_res1_col1=0;
2153 float nrapdb, rapdb;
2155 NT_node *T0,*T1,*T2,*PT, *POS;
2156 NT_node BT0, BT10,BT50, BT100,RBT;
2157 char **pair_pos_list;
2159 int ntree=0, ntree2;
2165 char *struc_tree100;
2169 char *color_struc_tree;
2175 declare_name(tot_pos_list);
2176 sprintf ( tot_pos_list, "%s.tot_pos_list", results);
2178 declare_name(pos_list);
2179 sprintf ( pos_list, "%s.pos_list", results);
2181 declare_name(struc_tree0);
2182 sprintf ( struc_tree0, "%s.struc_tree_full",results);
2184 declare_name(struc_tree10);
2185 sprintf ( struc_tree10, "%s.struc_tree10",results);
2187 declare_name(struc_tree100);
2188 sprintf ( struc_tree100, "%s.struc_tree100",results);
2190 declare_name(struc_tree50);
2191 sprintf ( struc_tree50, "%s.struc_tree50",results);
2193 declare_name(color_struc_tree);
2194 sprintf ( color_struc_tree, "%s.struc_tree.html", results);
2196 pair_pos_list=declare_char (A->len_aln*A->len_aln+1, 100);
2197 T1=vcalloc (A->len_aln*A->len_aln+1, sizeof (NT_node));
2198 T2=vcalloc (A->len_aln+1, sizeof (NT_node));
2200 PT=vcalloc (A->len_aln*A->len_aln+1, sizeof (NT_node));
2201 POS=vcalloc (A->len_aln+1, sizeof (NT_node));
2205 //Check all sequences have a PDB structure
2206 for (a=0; a<A->nseq; a++)
2208 if ( ! seq2P_template_file(A->S,a))
2210 fprintf ( stderr, "\n--- ERROR: %s has no structural template. All sequence in the MSA must have a known structure [FATAL]\n", (A->name[a]));
2215 printf_exit (EXIT_FAILURE, stderr, "\n\n---- ERROR: All provided sequences must have a valid PDB identifier [FATAL:tRMSD-%s]\n\n", PROGRAM);
2217 for ( s1=0; s1< (A->S)->nseq; s1++)
2218 if ( CL->T[s1]){PP=(CL->T[s1])->pdb_param;break;}
2220 for ( s1=0; s1< A->nseq; s1++)
2222 rs1=A->order[s1][0];
2223 if (CL->T[rs1] && !(CL->T[rs1])->ca_dist)(CL->T[rs1])->ca_dist=measure_ca_distances(CL->T[rs1]);
2225 pos=aln2pos_simple (A, A->nseq);
2226 dm=declare_float (A->nseq, A->nseq);
2227 dm_int=declare_int (A->nseq, A->nseq);
2228 count=declare_int (A->nseq, A->nseq);
2229 PP->maximum_distance=10000;
2231 tl=vfopen (tot_pos_list, "w");
2232 for (ncol=0,ntree=0, col1=0; col1< A->len_aln; col1++)
2235 output_completion (stderr, col1, A->len_aln,1, "Sample Columns");
2236 for (cont=1,s1=0; s1<A->nseq; s1++)if ( is_gap (A->seq_al[s1][col1]))cont=0;//Stop if gap in column 1
2238 if ( cont==0)continue;
2240 for (s1=0; s1<A->nseq; s1++)for ( s2=0; s2<A->nseq; s2++){count[s1][s2]=0;dm[s1][s2]=0;}
2242 for ( ntree2=0,col2=0; col2<A->len_aln; col2++)
2244 for (s1=0; s1< A->nseq-1; s1++)
2246 for ( s2=s1+1; s2<A->nseq; s2++)
2248 rs1=A->order[s1][0];
2249 rs2=A->order[s2][0];
2252 if ( s1==s2){dm[s1][s2]=0;continue;}
2253 else if (!(CL->T[rs1]) || !(CL->T[rs2])){cont=0;}
2254 else if ( islower (A->seq_al[s1][col1]) || islower ( A->seq_al[s2][col1])){cont=0;}
2255 else if ( pos[s1][col1]<=0 || pos[s2][col1]<=0 ){cont=0;}
2256 if (pos[s1][col2]<=0 || pos[s2][col2]<=0 ){cont=0;}//stop if Gap in Column 2
2257 else if ( FABS((pos[s1][col2]-pos[s1][col1]))<=PP->n_excluded_nb){cont=0;}
2258 else if ( FABS((pos[s2][col2]-pos[s2][col1]))<=PP->n_excluded_nb){cont=0;}
2259 else if ( islower (A->seq_al[s1][col2]) || islower ( A->seq_al[s2][col2])){cont=0;}
2260 if ( cont==0){continue;}
2263 real_res1_col1=pos[s1][col1]-1;
2264 real_res1_col2=pos[s1][col2]-1;
2266 real_res2_col1=pos[s2][col1]-1;
2267 real_res2_col2=pos[s2][col2]-1;
2269 d1=(CL->T[rs1])->ca_dist[real_res1_col1][real_res1_col2];
2270 d2=(CL->T[rs2])->ca_dist[real_res2_col1][real_res2_col2];
2272 if ( d1==UNDEFINED || d2 == UNDEFINED) continue;
2274 if ( strm ( PP->local_mode, "sphere"))
2276 in_bubble= (d1<PP->maximum_distance && d2<PP->maximum_distance)?1:0; ;
2278 else if ( strm ( PP->local_mode, "window"))
2280 wd1=FABS((pos[s1][col2]-pos[s1][col1]));
2281 wd2=FABS((pos[s2][col2]-pos[s2][col1]));
2282 in_bubble= (wd1<PP->maximum_distance && wd2<PP->maximum_distance)?1:0; ;
2286 delta=FABS((d1-d2));
2287 //delta=delta*delta;
2288 dm[s1][s2]=dm[s2][s1]+=delta;
2298 for (tree=1,s1=0; s1<A->nseq-1; s1++)
2299 for (s2=s1+1; s2<A->nseq; s2++)
2301 if ( count [s1][s2])dm[s1][s2]=dm[s2][s1]=dm[s1][s2]/(float)count[s1][s2];
2306 if (s1==0 && s2==1)min=max=dm[s1][s2];
2307 min=MIN(dm[s1][s2], min);
2308 max=MAX(dm[s1][s2], max);
2310 if (!tree || min==-1)continue;
2311 for (s1=0; s1<A->nseq-1; s1++)
2312 for (s2=s1+1; s2<A->nseq; s2++)
2314 dm_int[s1][s2]=dm_int[s2][s1]=((dm[s1][s2])/(max))*100;
2316 POS[col1]=T1[ntree]=compute_std_tree_2 ( A, dm_int, "_TMODE_upgma");
2317 fprintf (tl, "\n>Tree_%d Column\n", col1+1);
2318 print_tree (T1[ntree], "newick", tl);
2325 fprintf ( stderr, "\nERROR: No suitable pair of column supporting a tree [FATAL]\n", (A->name[a]));
2326 exit (EXIT_SUCCESS);
2329 score=treelist2avg_treecmp (T1, NULL);
2331 display_output_filename( stdout,"TreeList","newick",tot_pos_list, CHECK);
2333 if ((BT10=treelist2filtered_bootstrap (T1, NULL,score, 0.1)))
2335 vfclose (print_tree (BT10,"newick", vfopen (struc_tree10, "w")));
2336 display_output_filename( stdout,"Tree","newick",struc_tree10, CHECK);
2339 if ((BT50=treelist2filtered_bootstrap (T1, NULL, score,0.5)))
2341 vfclose (print_tree (BT50,"newick", vfopen (struc_tree50, "w")));
2342 display_output_filename( stdout,"Tree","newick",struc_tree50, CHECK);
2345 if ((BT100=treelist2filtered_bootstrap (T1, NULL,score, 1.0)))
2347 vfclose (print_tree (BT100,"newick", vfopen (struc_tree100, "w")));
2348 display_output_filename( stdout,"Tree","newick",struc_tree100, CHECK);
2355 B=copy_aln (A, NULL);
2356 for (a=0; a<A->len_aln; a++)
2363 S=tree_cmp (POS[a], RBT);
2369 score=NO_COLOR_RESIDUE;
2372 for (b=0; b<B->nseq; b++)
2374 if ( is_gap (B->seq_al[b][a]) || score == NO_COLOR_RESIDUE)
2376 B->seq_al[b][a]=NO_COLOR_RESIDUE;
2380 B->seq_al[b][a]=S->uw/10;
2385 output_format_aln ("score_html", A,B,color_struc_tree);
2386 display_output_filename( stdout,"Colored MSA","score_html",color_struc_tree, CHECK);
2390 exit (EXIT_SUCCESS);
2394 float square_atom ( Atom *X)
2397 return X->x*X->x + X->y*X->y + X->z*X->z;
2399 Atom* reframe_atom ( Atom *X, Atom*Y, Atom *Z, Atom *IN, Atom *R)
2401 float new_x, new_y, new_z;
2403 if ( R==NULL)R=vcalloc ( 1, sizeof (Atom));
2406 new_x= X->x*IN->x + Y->x*IN->y +Z->x*IN->z;
2407 new_y= X->y*IN->x + Y->y*IN->y +Z->y*IN->z;
2408 new_z= X->z*IN->x + Y->z*IN->y +Z->z*IN->z;
2416 Atom* add_atom ( Atom *A, Atom*B, Atom *R)
2418 if ( R==NULL)R=vcalloc ( 1, sizeof (Atom));
2426 Atom* diff_atom ( Atom *A, Atom*B, Atom *R)
2428 if ( R==NULL)R=vcalloc ( 1, sizeof (Atom));
2437 Atom * copy_atom ( Atom *A, Atom*R)
2439 if ( R==NULL)R=vcalloc ( 1, sizeof (Atom));
2441 R->res_num=A->res_num;
2446 sprintf( R->type, "%s", A->type);
2449 void print_atom (Atom *A)
2451 fprintf ( stdout, "%.2f %.2f %.2f", A->x, A->y, A->z);
2453 /************************************************************************/
2457 /************************************************************************/
2459 /*---------prototypes ----------*/
2460 static void computeBasePairMatrix(int**M,char*S,int l, int T);
2461 static int backtrack(int a,int b,int**M,char*S,char*P, int T);
2465 static int basePair(char x, char y)
2472 int a, b, c1, c2, lc1, lc2;
2473 mat=declare_short (256, 256);
2474 sprintf ( alp, "AGCTUagctu");
2475 for (a=0; a<strlen (alp); a++)
2477 for (b=a; b<strlen (alp)-1; b++)
2479 c1=alp[a];c2=alp[b];
2480 lc1=tolower(c1); lc2=tolower(c2);
2481 if ( lc1=='g' && lc2=='c')
2483 else if ( lc1=='a' && lc2=='u')
2485 else if ( lc1=='u' && lc2=='g')
2487 mat[c2][c1]=mat[c1][c2];
2491 return (int)mat[(int)x][(int)y];
2496 /* ------------------------------------------------------------ */
2498 char *nussinov(char *S, int THRESHOLD)
2503 /*-------------------------------
2505 paren is parenthesis expression for
2506 optimal RNA secondary structure
2507 THRESHOLD: Min distance between two paired residues
2508 -------------------------------*/
2513 /*----- initialization --*/
2515 paren=vcalloc (n+1, sizeof (char));
2516 numBasePairs=declare_int (n,n);
2518 for (i=0;i<n;i++) paren[i]='.';
2519 paren[n]='\0'; // paren is string of same length as S
2520 computeBasePairMatrix(numBasePairs,S,n, THRESHOLD);
2521 backtrack(0,n-1,numBasePairs,S,paren, THRESHOLD);
2522 free_int (numBasePairs, -1);
2526 static void computeBasePairMatrix(int** numBasePairs,char *S,int n, int THRESHOLD)
2528 int i,j,d,k,max,val,index;
2530 for (d = THRESHOLD; d < n; d++){
2531 for(i=0; i < n; i++)
2537 /*-------------------------------------
2538 if index<n at end of for-loop, then this
2539 means that index and j form a base pair,
2540 and this is noted by numBasePairs[j][i]=index.
2541 if index=n at end of for-loop, then this
2542 means that j is not base paired.
2543 -------------------------------------*/
2545 if ( numBasePairs[i][j-1]>max ){
2546 max = numBasePairs[i][j-1];
2548 // j not basepaired with some k such that i<k<j
2551 val = basePair(S[i],S[j]) + numBasePairs[i+1][j-1];
2552 if ( j-i<= THRESHOLD && val > max ){
2556 for(k=i; k<=j-THRESHOLD; k++){
2557 val = basePair(S[k],S[j]) + numBasePairs[i][k-1]
2558 + numBasePairs[k+1][j-1];
2564 numBasePairs[i][j]=max;
2566 numBasePairs[j][i]=index;
2568 numBasePairs[j][i]=-1;
2578 static int backtrack(int i, int j, int **numBasePairs,char *S, char *paren, int THRESHOLD)
2582 k = numBasePairs[j][i];
2587 if( THRESHOLD <= (j-1)-(k+1) )
2588 backtrack(k+1,j-1,numBasePairs,S,paren, THRESHOLD);
2589 if (THRESHOLD <= k-1-i )
2590 backtrack(i,k-1,numBasePairs,S,paren, THRESHOLD);
2593 if( THRESHOLD <= j-1-i )
2595 backtrack(i,j-1,numBasePairs,S,paren, THRESHOLD);
2603 char * rna_struc2rna_lib ( char *seq_name, char *seq, char *name)
2609 st=nussinov (seq, 2);
2610 if ( name==NULL)name=vtmpnam(NULL);
2611 fp=vfopen ( name, "w");
2612 fprintf (fp, "! TC_LIB_FORMAT_01\n");
2613 fprintf (fp, "1\n%s %d %s\n", seq_name, strlen (seq), seq);
2614 fprintf (fp, "#1 1\n");
2615 display_rna_ss (0, seq, st, fp);
2616 fprintf ( fp, "! SEQ_1_TO_N\n");
2619 //printf_system ( "cp %s test", name);
2622 int display_rna_ss ( int n, char *seq, char *st, FILE *fp)
2628 while ((p=st[n])!='\0')
2633 sprintf (string, "%d",n+1);
2634 n=display_rna_ss (n+1, seq, st, fp);
2635 fprintf (fp, "%s %d 100\n", string, n+1);
2646 /*********************************COPYRIGHT NOTICE**********************************/
2647 /*© Centro de Regulacio Genomica */
2649 /*Cedric Notredame */
2650 /*Tue Oct 27 10:12:26 WEST 2009. */
2651 /*All rights reserved.*/
2652 /*This file is part of T-COFFEE.*/
2654 /* T-COFFEE is free software; you can redistribute it and/or modify*/
2655 /* it under the terms of the GNU General Public License as published by*/
2656 /* the Free Software Foundation; either version 2 of the License, or*/
2657 /* (at your option) any later version.*/
2659 /* T-COFFEE is distributed in the hope that it will be useful,*/
2660 /* but WITHOUT ANY WARRANTY; without even the implied warranty of*/
2661 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/
2662 /* GNU General Public License for more details.*/
2664 /* You should have received a copy of the GNU General Public License*/
2665 /* along with Foobar; if not, write to the Free Software*/
2666 /* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/
2667 /*............................................... |*/
2668 /* If you need some more information*/
2669 /* cedric.notredame@europe.com*/
2670 /*............................................... |*/
2674 /*********************************COPYRIGHT NOTICE**********************************/