Next version of JABA
[jabaws.git] / binaries / src / tcoffee / t_coffee_source / evaluate_for_struc.c
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <math.h>
4 #include <stdarg.h>
5 #include <ctype.h>
6 #include <string.h>
7
8 #include "io_lib_header.h"
9 #include "util_lib_header.h"
10 #include "define_header.h"
11 #include "dp_lib_header.h"
12 void print_atom ( Atom*A);
13
14 float **** quantile_apdb_filtration ( Alignment *A, float ****residues, Constraint_list *CL,Pdb_param *PP, FILE *fp);
15 float **** irmsdmin_apdb_filtration ( Alignment *A, float ****residues, Constraint_list *CL,Pdb_param *PP, FILE *fp);
16 int apdb ( int argc, char *argv[])
17     {
18     
19         Constraint_list *CL=NULL;
20         Sequence  *S=NULL;
21         Alignment *A=NULL;
22         Alignment *EA=NULL;
23         Pdb_param *pdb_param;
24         
25         Fname *F=NULL;
26         char *file_name;
27         int a,c;
28
29         int n_pdb;
30         
31 /*PARAMETERS VARIABLES*/
32         int garbage;
33         char *parameters;
34         FILE *fp_parameters;
35
36         int quiet;
37         char *se_name;
38         FILE *le=NULL;
39         
40         char **list_file;
41         int n_list;
42         char **struc_to_use;
43         int n_struc_to_use;
44
45         char *aln;
46         char *repeat_seq;
47         char *repeat_pdb;
48         
49         char *color_mode;
50         char *comparison_io;
51
52         int  n_excluded_nb;
53
54         float maximum_distance;
55         float similarity_threshold;
56         float   md_threshold;
57
58
59         int print_rapdb;
60         
61         char  *outfile;
62         char  *run_name;
63         
64         char  *apdb_outfile;
65         char *cache;
66
67         char **out_aln_format;
68         int    n_out_aln_format;
69         
70         char *output_res_num;
71         char *local_mode;
72         float filter;
73         int filter_aln;
74         int irmsd_graph;
75         int nirmsd_graph;
76         int n_template_file;
77         char **template_file_list;
78         char *mode; 
79                 int prot_min_sim;
80         int prot_max_sim;
81         int prot_min_cov;
82         int pdb_min_sim;
83         int pdb_max_sim;
84         int pdb_min_cov;
85         
86
87         
88         char *prot_blast_server;
89         char *pdb_blast_server;
90         
91         
92         char *pdb_db;
93         char *prot_db;
94
95
96         argv=standard_initialisation (argv, &argc);
97         
98 /*PARAMETER PROTOTYPE:    READ PARAMETER FILE     */
99                declare_name (parameters);
100                get_cl_param(\
101                             /*argc*/      argc             ,\
102                             /*argv*/      argv             ,\
103                             /*output*/    &le              ,\
104                             /*Name*/      "-parameters"        ,\
105                             /*Flag*/      &garbage        ,\
106                             /*TYPE*/      "R_F"              ,\
107                             /*OPTIONAL?*/ OPTIONAL         ,\
108                             /*MAX Nval*/  1                ,\
109                             /*DOC*/       "Read the files in the parameter file" ,\
110                             /*Parameter*/ &parameters          ,\
111                             /*Def 1*/     "NULL"             ,\
112                             /*Def 2*/     "stdin"             ,\
113                             /*Min_value*/ "any"            ,\
114                             /*Max Value*/ "any"             \
115                    );             
116                if ( parameters && parameters[0])
117                   {
118                   argv[argc]=vcalloc ( VERY_LONG_STRING, sizeof(char));
119                   a=0;
120                   fp_parameters=vfopen (parameters, "r");
121                   while ((c=fgetc (fp_parameters))!=EOF)argv[1][a++]=c;
122                   vfclose (fp_parameters);
123                   argv[argc][a]='\0';
124                   argc++;
125                   argv=break_list ( argv, &argc, "=:;, \n");    
126                   }
127 /*PARAMETER PROTOTYPE*/
128      declare_name (se_name);
129      get_cl_param(\
130                             /*argc*/      argc          ,\
131                             /*argv*/      argv          ,\
132                             /*output*/    &le           ,\
133                             /*Name*/      "-quiet"      ,\
134                             /*Flag*/      &quiet        ,\
135                             /*TYPE*/      "W_F"         ,\
136                             /*OPTIONAL?*/ OPTIONAL      ,\
137                             /*MAX Nval*/  1             ,\
138                             /*DOC*/       "ND"          ,\
139                             /*Parameter*/ &se_name      ,\
140                             /*Def 1*/     "stderr"   ,\
141                             /*Def 2*/     "/dev/null"   ,\
142                             /*Min_value*/ "any"         ,\
143                             /*Max Value*/ "any"          \
144                    );
145
146      le=vfopen ( se_name, "w");
147      fprintf ( le, "\nPROGRAM: %s\n",argv[0]);
148
149 /*PARAMETER PROTOTYPE:        IN */
150         list_file=declare_char ( 200, STRING);
151         n_list=get_cl_param(\
152                             /*argc*/      argc          ,\
153                             /*argv*/      argv          ,\
154                             /*output*/    &le           ,\
155                             /*Name*/      "-in"         ,\
156                             /*Flag*/      &garbage      ,\
157                             /*TYPE*/      "S"           ,\
158                             /*OPTIONAL?*/ OPTIONAL      ,\
159                             /*MAX Nval*/  200           ,\
160                             /*DOC*/       "ND"          ,\
161                             /*Parameter*/ list_file     ,\
162                             /*Def 1*/    "",\
163                             /*Def 2*/     "stdin"       ,\
164                             /*Min_value*/ "any"         ,\
165                             /*Max Value*/ "any"          \
166                    );
167 /*PARAMETER PROTOTYPE:        IN */
168         struc_to_use=declare_char ( 200, STRING);
169         n_struc_to_use=get_cl_param(\
170                             /*argc*/      argc          ,\
171                             /*argv*/      argv          ,\
172                             /*output*/    &le           ,\
173                             /*Name*/      "-struc_to_use"         ,\
174                             /*Flag*/      &garbage      ,\
175                             /*TYPE*/      "S"           ,\
176                             /*OPTIONAL?*/ OPTIONAL      ,\
177                             /*MAX Nval*/  200           ,\
178                             /*DOC*/       "ND"          ,\
179                             /*Parameter*/ struc_to_use     ,\
180                             /*Def 1*/    "",\
181                             /*Def 2*/     "stdin"       ,\
182                             /*Min_value*/ "any"         ,\
183                             /*Max Value*/ "any"          \
184                    );
185
186 /*PARAMETER PROTOTYPE:        COMPARISON IO */
187         declare_name (comparison_io);
188         get_cl_param(\
189                             /*argc*/      argc          ,\
190                             /*argv*/      argv          ,\
191                             /*output*/    &le           ,\
192                             /*Name*/      "-io_format"  ,\
193                             /*Flag*/      &garbage      ,\
194                             /*TYPE*/      "S"           ,\
195                             /*OPTIONAL?*/ OPTIONAL      ,\
196                             /*MAX Nval*/  200           ,\
197                             /*DOC*/       "ND"          ,\
198                             /*Parameter*/  &comparison_io,\
199                             /*Def 1*/    "hsgd0123456",\
200                             /*Def 2*/     ""       ,\
201                             /*Min_value*/ "any"         ,\
202                             /*Max Value*/ "any"          \
203                    );
204 /*PARAMETER PROTOTYPE:        ALN */
205         declare_name (aln);
206         get_cl_param(\
207                             /*argc*/      argc          ,\
208                             /*argv*/      argv          ,\
209                             /*output*/    &le           ,\
210                             /*Name*/      "-aln"         ,\
211                             /*Flag*/      &garbage      ,\
212                             /*TYPE*/      "S"           ,\
213                             /*OPTIONAL?*/ OPTIONAL      ,\
214                             /*MAX Nval*/  1           ,\
215                             /*DOC*/       "ND"          ,\
216                             /*Parameter*/  &aln,\
217                             /*Def 1*/    "",\
218                             /*Def 2*/     "stdin"       ,\
219                             /*Min_value*/ "any"         ,\
220                             /*Max Value*/ "any"          \
221                    );
222 /*PARAMETER PROTOTYPE:        ALN */
223         
224         declare_name (repeat_seq);
225         get_cl_param(\
226                             /*argc*/      argc          ,\
227                             /*argv*/      argv          ,\
228                             /*output*/    &le           ,\
229                             /*Name*/      "-repeat_seq"         ,\
230                             /*Flag*/      &garbage      ,\
231                             /*TYPE*/      "S"           ,\
232                             /*OPTIONAL?*/ OPTIONAL      ,\
233                             /*MAX Nval*/  1           ,\
234                             /*DOC*/       "ND"          ,\
235                             /*Parameter*/  &repeat_seq,\
236                             /*Def 1*/    "",\
237                             /*Def 2*/     "stdin"       ,\
238                             /*Min_value*/ "any"         ,\
239                             /*Max Value*/ "any"          \
240                    );
241
242 /*PARAMETER PROTOTYPE:        ALN */
243         declare_name (repeat_pdb);
244         get_cl_param(\
245                             /*argc*/      argc          ,\
246                             /*argv*/      argv          ,\
247                             /*output*/    &le           ,\
248                             /*Name*/      "-repeat_pdb"         ,\
249                             /*Flag*/      &garbage      ,\
250                             /*TYPE*/      "S"           ,\
251                             /*OPTIONAL?*/ OPTIONAL      ,\
252                             /*MAX Nval*/  1           ,\
253                             /*DOC*/       "ND"          ,\
254                             /*Parameter*/  &repeat_pdb,\
255                             /*Def 1*/    "",\
256                             /*Def 2*/     "stdin"       ,\
257                             /*Min_value*/ "any"         ,\
258                             /*Max Value*/ "any"          \
259                    );
260
261 /*PARAMETER PROTOTYPE:    Nb to exclude     */
262                get_cl_param(\
263                             /*argc*/      argc          ,\
264                             /*argv*/      argv          ,\
265                             /*output*/    &le           ,\
266                             /*Name*/      "-n_excluded_nb"    ,\
267                             /*Flag*/      &garbage      ,\
268                             /*TYPE*/      "D"           ,\
269                             /*OPTIONAL?*/ OPTIONAL      ,\
270                             /*MAX Nval*/  1             ,\
271                             /*DOC*/       "Exclude the N Nb on each side of the central residue. -1 triggers an automatic setting equal to the window size corresponding to the sphere"          ,\
272                             /*Parameter*/ &n_excluded_nb          ,\
273                             /*Def 1*/    "-1"         ,\
274                             /*Def 2*/    "1"             ,\
275                             /*Min_value*/ "any"         ,\
276                             /*Max Value*/ "any"          \
277                    );
278 /*PARAMETER PROTOTYPE:   diatances to count    */
279                get_cl_param(\
280                             /*argc*/      argc          ,\
281                             /*argv*/      argv          ,\
282                             /*output*/    &le           ,\
283                             /*Name*/      "-similarity_threshold"    ,\
284                             /*Flag*/      &garbage       ,\
285                             /*TYPE*/      "F"            ,\
286                             /*OPTIONAL?*/ OPTIONAL       ,\
287                             /*MAX Nval*/  1              ,\
288                             /*DOC*/       "ND"           ,\
289                             /*Parameter*/ &similarity_threshold,\
290                             /*Def 1*/    "70"             ,\
291                             /*Def 2*/    "70"             ,\
292                             /*Min_value*/ "any"          ,\
293                             /*Max Value*/ "any"           \
294                    );
295 /*PARAMETER PROTOTYPE:   diatances to count    */
296                get_cl_param(\
297                             /*argc*/      argc          ,\
298                             /*argv*/      argv          ,\
299                             /*output*/    &le           ,\
300                             /*Name*/      "-filter"    ,\
301                             /*Flag*/      &garbage       ,\
302                             /*TYPE*/      "F"            ,\
303                             /*OPTIONAL?*/ OPTIONAL       ,\
304                             /*MAX Nval*/  1              ,\
305                             /*DOC*/       "Filter by only keeping the best quantile"           ,\
306                             /*Parameter*/ &filter,\
307                             /*Def 1*/    "1.00"             ,\
308                             /*Def 2*/    "1.00"             ,\
309                             /*Min_value*/ "-1.00"          ,\
310                             /*Max Value*/ "1.00"           \
311                    );
312 /*PARAMETER PROTOTYPE:   diatances to count    */
313                get_cl_param(\
314                             /*argc*/      argc          ,\
315                             /*argv*/      argv          ,\
316                             /*output*/    &le           ,\
317                             /*Name*/      "-filter_aln"    ,\
318                             /*Flag*/      &garbage       ,\
319                             /*TYPE*/      "D"            ,\
320                             /*OPTIONAL?*/ OPTIONAL       ,\
321                             /*MAX Nval*/  1              ,\
322                             /*DOC*/       "Lower Case For Residues Filtered Out"           ,\
323                             /*Parameter*/ &filter_aln,\
324                             /*Def 1*/    "0"             ,\
325                             /*Def 2*/    "1"             ,\
326                             /*Min_value*/ "0"          ,\
327                             /*Max Value*/ "1"           \
328                    );
329 /*PARAMETER PROTOTYPE:   diatances to count    */
330                get_cl_param(\
331                             /*argc*/      argc          ,\
332                             /*argv*/      argv          ,\
333                             /*output*/    &le           ,\
334                             /*Name*/      "-irmsd_graph"    ,\
335                             /*Flag*/      &garbage       ,\
336                             /*TYPE*/      "D"            ,\
337                             /*OPTIONAL?*/ OPTIONAL       ,\
338                             /*MAX Nval*/  1              ,\
339                             /*DOC*/       "Outputs the irmsd, position/position"           ,\
340                             /*Parameter*/ &irmsd_graph,\
341                             /*Def 1*/    "0"             ,\
342                             /*Def 2*/    "1"             ,\
343                             /*Min_value*/ "0"          ,\
344                             /*Max Value*/ "1"           \
345                    );
346 /*PARAMETER PROTOTYPE:   diatances to count    */
347                get_cl_param(\
348                             /*argc*/      argc          ,\
349                             /*argv*/      argv          ,\
350                             /*output*/    &le           ,\
351                             /*Name*/      "-nirmsd_graph"    ,\
352                             /*Flag*/      &garbage       ,\
353                             /*TYPE*/      "D"            ,\
354                             /*OPTIONAL?*/ OPTIONAL       ,\
355                             /*MAX Nval*/  1              ,\
356                             /*DOC*/       "Outputs the NIRMSD VS N Removed Residues Curve"           ,\
357                             /*Parameter*/ &nirmsd_graph,\
358                             /*Def 1*/    "0"             ,\
359                             /*Def 2*/    "1"             ,\
360                             /*Min_value*/ "0"          ,\
361                             /*Max Value*/ "1"           \
362                    );
363 /*PARAMETER PROTOTYPE:    -rmsd_threshold     */
364                get_cl_param(\
365                             /*argc*/      argc          ,\
366                             /*argv*/      argv          ,\
367                             /*output*/    &le           ,\
368                             /*Name*/      "-md_threshold"     ,\
369                             /*Flag*/      &garbage      ,\
370                             /*TYPE*/      "F"           ,\
371                             /*OPTIONAL?*/ OPTIONAL      ,\
372                             /*MAX Nval*/  1             ,\
373                             /*DOC*/       "ND"          ,\
374                             /*Parameter*/ &md_threshold ,\
375                             /*Def 1*/    "1"            ,\
376                             /*Def 2*/    "1"             ,\
377                             /*Min_value*/ "any"         ,\
378                             /*Max Value*/ "any"         \
379                    );
380
381 /*PARAMETER PROTOTYPE:    -maximum distances     */
382                get_cl_param(\
383                             /*argc*/      argc          ,\
384                             /*argv*/      argv          ,\
385                             /*output*/    &le           ,\
386                             /*Name*/      "-maximum_distance"     ,\
387                             /*Flag*/      &garbage      ,\
388                             /*TYPE*/      "F"           ,\
389                             /*OPTIONAL?*/ OPTIONAL      ,\
390                             /*MAX Nval*/  1             ,\
391                             /*DOC*/       "ND"          ,\
392                             /*Parameter*/ &maximum_distance          ,\
393                             /*Def 1*/    "10"            ,\
394                             /*Def 2*/    "10"             ,\
395                             /*Min_value*/ "any"         ,\
396                             /*Max Value*/ "any"         \
397                    );
398
399
400 /*PARAMETER PROTOTYPE:    -print_rapdb     */
401                get_cl_param(\
402                             /*argc*/      argc          ,\
403                             /*argv*/      argv          ,\
404                             /*output*/    &le           ,\
405                             /*Name*/      "-print_rapdb"     ,\
406                             /*Flag*/      &garbage      ,\
407                             /*TYPE*/      "D"           ,\
408                             /*OPTIONAL?*/ OPTIONAL      ,\
409                             /*MAX Nval*/  1             ,\
410                             /*DOC*/       "Prints the neighborhood of each pair of aligned residues, along with the associated local score"          ,\
411                             /*Parameter*/ &print_rapdb          ,\
412                             /*Def 1*/    "0"            ,\
413                             /*Def 2*/    "1"             ,\
414                             /*Min_value*/ "any"         ,\
415                             /*Max Value*/ "any"         \
416                    );          
417
418 /*PARAMETER PROTOTYPE:    RUN_NAME     */
419                declare_name (run_name);
420                get_cl_param(\
421                             /*argc*/      argc          ,\
422                             /*argv*/      argv          ,\
423                             /*output*/    &le           ,\
424                             /*Name*/      "-run_name"    ,\
425                             /*Flag*/      &garbage      ,\
426                             /*TYPE*/      "W_F"         ,\
427                             /*OPTIONAL?*/ OPTIONAL      ,\
428                             /*MAX Nval*/  1             ,\
429                             /*DOC*/       "ND"          ,\
430                             /*Parameter*/ &run_name      ,\
431                             /*Def 1*/    "default"      ,\
432                             /*Def 2*/    ""             ,\
433                             /*Min_value*/ "default"         ,\
434                             /*Max Value*/ "any"          \
435                    );
436 /*PARAMETER PROTOTYPE:    OUTFILE     */
437 /*PARAMETER PROTOTYPE:    OUTFILE     */
438                declare_name ( outfile);
439                get_cl_param(\
440                             /*argc*/      argc          ,\
441                             /*argv*/      argv          ,\
442                             /*output*/    &le           ,\
443                             /*Name*/      "-outfile"    ,\
444                             /*Flag*/      &garbage      ,\
445                             /*TYPE*/      "W_F"         ,\
446                             /*OPTIONAL?*/ OPTIONAL      ,\
447                             /*MAX Nval*/  1             ,\
448                             /*DOC*/       "ND"          ,\
449                             /*Parameter*/ &outfile      ,\
450                             /*Def 1*/    "no"      ,\
451                             /*Def 2*/     "default"             ,\
452                             /*Min_value*/ "default"         ,\
453                             /*Max Value*/ "any"          \
454                    );
455 /*PARAMETER PROTOTYPE:    OUTFILE     */
456                declare_name ( apdb_outfile);
457                get_cl_param(\
458                             /*argc*/      argc          ,\
459                             /*argv*/      argv          ,\
460                             /*output*/    &le           ,\
461                             /*Name*/      "-apdb_outfile"    ,\
462                             /*Flag*/      &garbage      ,\
463                             /*TYPE*/      "W_F"         ,\
464                             /*OPTIONAL?*/ OPTIONAL      ,\
465                             /*MAX Nval*/  1             ,\
466                             /*DOC*/       "ND"          ,\
467                             /*Parameter*/ &apdb_outfile      ,\
468                             /*Def 1*/    "stdout"      ,\
469                             /*Def 2*/    "default"             ,\
470                             /*Min_value*/ "any"         ,\
471                             /*Max Value*/ "any"          \
472                    );
473
474 /*PARAMETER PROTOTYPE:    OUTPUT_FORMAT    */
475                out_aln_format=declare_char ( 200, STRING);
476                n_out_aln_format=get_cl_param(\
477                             /*argc*/      argc           ,\
478                             /*argv*/      argv           ,\
479                             /*output*/    &le            ,\
480                             /*Name*/      "-output"      ,\
481                             /*Flag*/      &garbage       ,\
482                             /*TYPE*/      "S"            ,\
483                             /*OPTIONAL?*/ OPTIONAL       ,\
484                             /*MAX Nval*/  200              ,\
485                             /*DOC*/       "ND"           ,\
486                             /*Parameter*/ out_aln_format,\
487                             /*Def 1*/    "score_html"           ,\
488                             /*Def 2*/    ""             ,\
489                             /*Min_value*/ "any"          ,\
490                             /*Max Value*/ "any"           \
491                    );
492
493
494
495 /*PARAMETER PROTOTYPE:    INFILE    */
496                declare_name (color_mode);
497                get_cl_param(\
498                             /*argc*/      argc           ,\
499                             /*argv*/      argv           ,\
500                             /*output*/    &le            ,\
501                             /*Name*/      "-color_mode"        ,\
502                             /*Flag*/      &garbage       ,\
503                             /*TYPE*/      "S"            ,\
504                             /*OPTIONAL?*/ OPTIONAL       ,\
505                             /*MAX Nval*/  1              ,\
506                             /*DOC*/       "ND"           ,\
507                             /*Parameter*/ &color_mode ,\
508                             /*Def 1*/    "apdb"            ,\
509                             /*Def 2*/    "irmsd"             ,\
510                             /*Min_value*/ "any"           ,\
511                             /*Max Value*/ "any"            \
512                    );
513 /*PARAMETER PROTOTYPE:    INFILE    */
514                declare_name (output_res_num);
515                get_cl_param(\
516                             /*argc*/      argc           ,\
517                             /*argv*/      argv           ,\
518                             /*output*/    &le            ,\
519                             /*Name*/      "-seqnos"        ,\
520                             /*Flag*/      &garbage       ,\
521                             /*TYPE*/      "S"            ,\
522                             /*OPTIONAL?*/ OPTIONAL       ,\
523                             /*MAX Nval*/  1              ,\
524                             /*DOC*/       "ND"           ,\
525                             /*Parameter*/ &output_res_num ,\
526                             /*Def 1*/    "off"            ,\
527                             /*Def 2*/    "on"             ,\
528                             /*Min_value*/ "any"           ,\
529                             /*Max Value*/ "any"            \
530                    );
531                declare_name (cache);
532                get_cl_param(\
533                             /*argc*/      argc          ,\
534                             /*argv*/      argv          ,\
535                             /*output*/    &le           ,\
536                             /*Name*/      "-cache"    ,\
537                             /*Flag*/      &garbage      ,\
538                             /*TYPE*/      "W_F"         ,\
539                             /*OPTIONAL?*/ OPTIONAL      ,\
540                             /*MAX Nval*/  1             ,\
541                             /*DOC*/       "use,ignore,update,local, directory name"          ,\
542                             /*Parameter*/ &cache       ,\
543                             /*Def 1*/    "use"      ,\
544                             /*Def 2*/    "update"      ,\
545                             /*Min_value*/ "any"         ,\
546                             /*Max Value*/ "any"          \
547                    );
548              
549                 declare_name (local_mode);
550                 get_cl_param(                                   \
551                             /*argc*/      argc          ,\
552                             /*argv*/      argv          ,\
553                             /*output*/    &le           ,\
554                             /*Name*/      "-local_mode"    ,\
555                             /*Flag*/      &garbage      ,\
556                             /*TYPE*/      "W_F"         ,\
557                             /*OPTIONAL?*/ OPTIONAL      ,\
558                             /*MAX Nval*/  1             ,\
559                             /*DOC*/       "Mode for choosing the Neighborhood (bubble or window)\nWhen selecting window, maximum distance becomes the window 1/2 size, in residues\nWhen using sphere, maximum_distance is the sphere radius in Angstrom"          ,\
560                             /*Parameter*/ &local_mode       ,\
561                             /*Def 1*/    "sphere"      ,\
562                             /*Def 2*/    "window"      ,\
563                             /*Min_value*/ "any"         ,\
564                             /*Max Value*/ "any"          \
565                                           );
566                 
567 /*PARAMETER PROTOTYPE:        IN */
568                 template_file_list=declare_char (100, STRING);
569                 n_template_file=get_cl_param(                   \
570                             /*argc*/      argc          , \
571                             /*argv*/      argv          , \
572                             /*output*/    &le           ,\
573                             /*Name*/      "-template_file"         ,\
574                             /*Flag*/      &garbage      ,\
575                             /*TYPE*/      "S"           ,\
576                             /*OPTIONAL?*/ OPTIONAL      ,\
577                             /*MAX Nval*/  1000           ,\
578                             /*DOC*/       "List of templates file for the sequences",\
579                             /*Parameter*/ template_file_list     ,      \
580                             /*Def 1*/    "_SELF_P_",\
581                             /*Def 2*/     "stdin"       ,\
582                             /*Min_value*/ "any"         ,\
583                             /*Max Value*/ "any"          \
584                                           );
585                 /*PARAMETER PROTOTYPE:        MODE */
586                 declare_name (mode);
587                 get_cl_param(                             \
588                             /*argc*/      argc          , \
589                             /*argv*/      argv          , \
590                             /*output*/    &le           ,\
591                             /*Name*/      "-mode"         ,\
592                             /*Flag*/      &garbage      ,\
593                             /*TYPE*/      "S"           ,\
594                             /*OPTIONAL?*/ OPTIONAL      ,\
595                             /*MAX Nval*/  1           ,\
596                             /*DOC*/       "Mode: irmsd, ",\
597                             /*Parameter*/ &mode    ,    \
598                             /*Def 1*/    "irmsd",\
599                             /*Def 2*/     "stdin"       ,\
600                             /*Min_value*/ "any"         ,\
601                             /*Max Value*/ "any"          \
602                                           );
603
604         
605                 
606         get_cl_param(\
607                             /*argc*/      argc             ,\
608                             /*argv*/      argv             ,\
609                             /*output*/    &le              ,\
610                             /*Name*/      "-prot_min_sim"        ,\
611                             /*Flag*/      &prot_min_sim        ,\
612                             /*TYPE*/      "D"              ,\
613                             /*OPTIONAL?*/ OPTIONAL         ,\
614                             /*MAX Nval*/  1                ,\
615                             /*DOC*/       "Minimum similarity between a sequence and its PDB target" ,\
616                             /*Parameter*/ &prot_min_sim          ,\
617                             /*Def 1*/     "0"             ,\
618                             /*Def 2*/     "20"             ,\
619                             /*Min_value*/ "any"            ,\
620                             /*Max Value*/ "any"             \
621                    );
622  set_int_variable ("prot_min_sim", prot_min_sim);
623
624 get_cl_param(\
625                             /*argc*/      argc             ,\
626                             /*argv*/      argv             ,\
627                             /*output*/    &le              ,\
628                             /*Name*/      "-prot_max_sim"        ,\
629                             /*Flag*/      &prot_max_sim        ,\
630                             /*TYPE*/      "D"              ,\
631                             /*OPTIONAL?*/ OPTIONAL         ,\
632                             /*MAX Nval*/  1                ,\
633                             /*DOC*/       "Maximum similarity between a sequence and its BLAST relatives" ,\
634                             /*Parameter*/ &prot_max_sim          ,\
635                             /*Def 1*/     "90"             ,\
636                             /*Def 2*/     "100"             ,\
637                             /*Min_value*/ "any"            ,\
638                             /*Max Value*/ "any"             \
639                    );
640  set_int_variable ("prot_max_sim", prot_max_sim);
641  
642 get_cl_param(\
643                             /*argc*/      argc             ,\
644                             /*argv*/      argv             ,\
645                             /*output*/    &le              ,\
646                             /*Name*/      "-prot_min_cov"        ,\
647                             /*Flag*/      &prot_min_cov        ,\
648                             /*TYPE*/      "D"              ,\
649                             /*OPTIONAL?*/ OPTIONAL         ,\
650                             /*MAX Nval*/  1                ,\
651                             /*DOC*/       "Minimum coverage of a sequence by its BLAST relatives" ,\
652                             /*Parameter*/ &prot_min_cov          ,\
653                             /*Def 1*/     "0"             ,\
654                             /*Def 2*/     "0"             ,\
655                             /*Min_value*/ "any"            ,\
656                             /*Max Value*/ "any"             \
657                    );
658 set_int_variable ("prot_min_cov", prot_min_cov);
659
660 get_cl_param(\
661                             /*argc*/      argc             ,\
662                             /*argv*/      argv             ,\
663                             /*output*/    &le              ,\
664                             /*Name*/      "-pdb_min_sim"        ,\
665                             /*Flag*/      &pdb_min_sim        ,\
666                             /*TYPE*/      "D"              ,\
667                             /*OPTIONAL?*/ OPTIONAL         ,\
668                             /*MAX Nval*/  1                ,\
669                             /*DOC*/       "Minimum similarity between a sequence and its PDB target" ,\
670                             /*Parameter*/ &pdb_min_sim          ,\
671                             /*Def 1*/     "35"             ,\
672                             /*Def 2*/     "35"             ,\
673                             /*Min_value*/ "any"            ,\
674                             /*Max Value*/ "any"             \
675                    );
676
677  set_int_variable ("pdb_min_sim", pdb_min_sim);
678  get_cl_param(                                                  \
679                             /*argc*/      argc             ,\
680                             /*argv*/      argv             ,\
681                             /*output*/    &le              ,\
682                             /*Name*/      "-pdb_max_sim"        ,\
683                             /*Flag*/      &pdb_max_sim        ,\
684                             /*TYPE*/      "D"              ,\
685                             /*OPTIONAL?*/ OPTIONAL         ,\
686                             /*MAX Nval*/  1                ,\
687                             /*DOC*/       "Maximum similarity between a sequence and its PDB target" ,\
688                             /*Parameter*/ &pdb_max_sim          ,\
689                             /*Def 1*/     "100"             ,\
690                             /*Def 2*/     "0"             ,\
691                             /*Min_value*/ "any"            ,\
692                             /*Max Value*/ "any"             \
693                    );
694  set_int_variable ("pdb_max_sim", pdb_max_sim);
695  get_cl_param(                                                  \
696                             /*argc*/      argc             ,\
697                             /*argv*/      argv             ,\
698                             /*output*/    &le              ,\
699                             /*Name*/      "-pdb_min_cov"        ,\
700                             /*Flag*/      &pdb_min_cov        ,\
701                             /*TYPE*/      "D"              ,\
702                             /*OPTIONAL?*/ OPTIONAL         ,\
703                             /*MAX Nval*/  1                ,\
704                             /*DOC*/       "Minimum coverage of a sequence by its PDB target" ,\
705                             /*Parameter*/ &pdb_min_cov          ,\
706                             /*Def 1*/     "50"             ,\
707                             /*Def 2*/     "25"             ,\
708                             /*Min_value*/ "any"            ,\
709                             /*Max Value*/ "any"             \
710                    );
711 set_int_variable ("pdb_min_cov", pdb_min_cov);
712  
713
714  
715 declare_name (pdb_blast_server);
716  get_cl_param(\
717                             /*argc*/      argc          ,\
718                             /*argv*/      argv          ,\
719                             /*output*/    &le           ,\
720                             /*Name*/      "-pdb_blast_server"    ,\
721                             /*Flag*/      &garbage      ,\
722                             /*TYPE*/      "W_F"         ,\
723                             /*OPTIONAL?*/ OPTIONAL      ,\
724                             /*MAX Nval*/  1             ,\
725                             /*DOC*/       "ND"          ,\
726                             /*Parameter*/&pdb_blast_server       ,\
727                             /*Def 1*/    "EBI"      ,\
728                             /*Def 2*/    "default"      ,\
729                             /*Min_value*/ "any"         ,\
730                             /*Max Value*/ "any"          \
731                    );
732 declare_name (prot_blast_server);
733  get_cl_param(\
734                             /*argc*/      argc          ,\
735                             /*argv*/      argv          ,\
736                             /*output*/    &le           ,\
737                             /*Name*/      "-blast"    ,\
738                             /*Flag*/      &garbage      ,\
739                             /*TYPE*/      "W_F"         ,\
740                             /*OPTIONAL?*/ OPTIONAL      ,\
741                             /*MAX Nval*/  1             ,\
742                             /*DOC*/       "ND"          ,\
743                             /*Parameter*/&prot_blast_server       ,\
744                             /*Def 1*/    ""      ,\
745                             /*Def 2*/    ""      ,\
746                             /*Min_value*/ "any"         ,\
747                             /*Max Value*/ "any"          \
748                    );
749  //make sure that -blast and -blast_server are both supported blast>blast_server 
750  if ( !prot_blast_server[0])
751    {
752      get_cl_param(                                              \
753                             /*argc*/      argc          ,\
754                             /*argv*/      argv          ,\
755                             /*output*/    &le           ,\
756                             /*Name*/      "-blast_server"    ,\
757                             /*Flag*/      &garbage      ,\
758                             /*TYPE*/      "W_F"         ,\
759                             /*OPTIONAL?*/ OPTIONAL      ,\
760                             /*MAX Nval*/  1             ,\
761                             /*DOC*/       "ND"          ,\
762                             /*Parameter*/&prot_blast_server       ,\
763                             /*Def 1*/    "EBI"      ,\
764                             /*Def 2*/    "default"      ,\
765                             /*Min_value*/ "any"         ,\
766                             /*Max Value*/ "any"          \
767                    );
768    }
769  set_string_variable ("blast_server", prot_blast_server);
770  
771
772  
773  declare_name (pdb_db);
774  get_cl_param(\
775                             /*argc*/      argc          ,\
776                             /*argv*/      argv          ,\
777                             /*output*/    &le           ,\
778                             /*Name*/      "-pdb_db"    ,\
779                             /*Flag*/      &garbage      ,\
780                             /*TYPE*/      "W_F"         ,\
781                             /*OPTIONAL?*/ OPTIONAL      ,\
782                             /*MAX Nval*/  1             ,\
783                             /*DOC*/       "Non Redundant PDB database"          ,\
784                             /*Parameter*/&pdb_db       ,\
785                             /*Def 1*/    "pdb"      ,\
786                             /*Def 2*/    "default"      ,\
787                             /*Min_value*/ "any"         ,\
788                             /*Max Value*/ "any"          \
789                    );
790  set_string_variable ("pdb_db", pdb_db);
791  
792
793 declare_name (prot_db);
794  get_cl_param(\
795                             /*argc*/      argc          ,\
796                             /*argv*/      argv          ,\
797                             /*output*/    &le           ,\
798                             /*Name*/      "-protein_db"    ,\
799                             /*Flag*/      &garbage      ,\
800                             /*TYPE*/      "W_F"         ,\
801                             /*OPTIONAL?*/ OPTIONAL      ,\
802                             /*MAX Nval*/  1             ,\
803                             /*DOC*/       "ND"          ,\
804                             /*Parameter*/&prot_db       ,\
805                             /*Def 1*/    "uniprot"      ,\
806                             /*Def 2*/    "default"      ,\
807                             /*Min_value*/ "any"         ,\
808                             /*Max Value*/ "any"          \
809                    );
810  // set the correct mode:
811  if ( strm (argv[0], "trmsd"))sprintf (mode, "trmsd");
812    
813  set_string_variable ("prot_db", prot_db);
814  
815
816                 if (argc==1){myexit (EXIT_SUCCESS);}
817                 
818                 if ( strm (outfile,"no"))n_out_aln_format=0;
819         
820                 get_cl_param( argc, argv,&le, NULL,NULL,NULL,0,0,NULL); 
821                 prepare_cache (cache);        
822                 
823                 
824                 if (strm ( aln, ""))
825                   sprintf ( aln, "%s", argv[1]);
826                 
827                 if (!is_aln (aln))
828                   {
829                     printf_exit (EXIT_FAILURE, stderr, "\n\n---- ERROR: File %s must be a valid alignment [FATAL:%s-%s]\n\n",aln,argv[0], PROGRAM);
830                   }
831                 
832                 pdb_param=vcalloc ( 1, sizeof(Pdb_param));
833                 
834                 pdb_param->similarity_threshold=similarity_threshold;
835                 
836                 pdb_param->md_threshold=md_threshold;
837                 pdb_param->maximum_distance=maximum_distance;
838                 
839                 if ( n_excluded_nb>0)
840                   pdb_param->n_excluded_nb=n_excluded_nb;
841                 else if ( n_excluded_nb==-1)
842                   pdb_param->n_excluded_nb=(int)((float)maximum_distance/(float)1.57);
843                 /* Exclude all the nb within the bubble at +1, +2, +n*/    
844                 pdb_param->print_rapdb=print_rapdb;
845                 pdb_param->comparison_io=comparison_io;
846                 
847                 pdb_param->local_mode=local_mode;
848                 pdb_param->color_mode=lower_string (color_mode);
849                 pdb_param->filter=filter;
850                 pdb_param->filter_aln=filter_aln;
851                 pdb_param->irmsd_graph=irmsd_graph;
852                 pdb_param->nirmsd_graph=nirmsd_graph;
853                 
854                 sprintf ( list_file[n_list++], "S%s", aln);
855                 
856         
857                 if (!strm (repeat_seq, ""))
858                   {
859                     
860                     sprintf ( template_file_list[0], "%s", process_repeat (list_file[0], repeat_seq, repeat_pdb));
861                     fprintf ( le, "\n##Turn a repeat List into a Template File\n");
862                     le=display_file_content (le,template_file_list[0]);
863                     fprintf ( le, "\n\n");
864                   }
865                 S=read_seq_in_n_list (list_file, n_list, NULL, NULL);
866                 
867                 le=display_sequences_names ( S,le,0, 0);
868         
869                 if ( n_template_file)
870                   {
871                     fprintf ( le, "\nLooking For Sequence Templates:\n");
872                     for ( a=0; a< n_template_file; a++)
873                       {
874                         fprintf ( le, "\n\tTemplate Type: [%s] Mode Or File: [%s] [Start", template_type2type_name(template_file_list[a]), template_file_list[a]);
875                         S=seq2template_seq(S, template_file_list[a], F);
876                         fprintf ( le, "]");
877                       }
878                   }
879         
880                 if ( !strm (run_name, "default"))
881                   {
882                     F=parse_fname(run_name);
883                     sprintf (F->name, "%s", F->full);
884                   }
885                 else 
886                   {
887                     F=parse_fname (aln);
888                   }
889
890                 for ( a=0; a< S->nseq; a++)
891                   {
892                     char *p;
893                     
894                     p=seq2T_value (S, a, "template_file", "_P_");
895                     
896                     if (p)sprintf (S->file[a], "%s",p);
897                   }
898                 
899                 CL=declare_constraint_list ( S,NULL, NULL, 0,NULL, NULL);
900                 CL->T=vcalloc (S->nseq,sizeof (Ca_trace*));
901                 
902
903                 for ( n_pdb=0,a=0; a<S->nseq; a++)
904                   {
905                     if ( !is_pdb_file ( S->file[a])){CL->T[a]=NULL;continue;}
906                     CL->T[a]=read_ca_trace    (S->file[a], "ATOM");
907                     CL->T[a]=trim_ca_trace    (CL->T[a], S->seq[a]);
908                     (CL->T[a])->pdb_param=pdb_param;
909                     n_pdb++;
910                   }
911
912                 A=declare_aln (S);
913         
914
915                 A->residue_case=KEEP_CASE;
916                 A=main_read_aln(aln, A);
917                 EA=copy_aln (A, EA);
918                 A->CL=CL;
919                 
920                 if ( strm (apdb_outfile, "default"))
921                   sprintf ( apdb_outfile, "%s.apdb_result", F->name);
922                 
923                 if ( n_pdb<2)
924                   {
925                     FILE *fp;
926                     fp=vfopen (apdb_outfile, "w");
927                     fprintf (fp, "\nYour Alignment Does Not Contain Enough Sequences With a known Structure\n");
928                     fprintf (fp, "To Use APDB, your alignment must include at least TWO sequences with a known structure.\n");
929                     fprintf (fp, "These sequences must be named according to their PDB identifier, followed by the chain index (if any) ex: 1fnkA\n");
930                     fprintf (fp, "[FATAL:%s]\n", PROGRAM);
931                     vfclose (fp);
932                   }
933                 else if ( strm (mode, "irmsd"))
934                   {
935                       EA=analyse_pdb ( A, EA, apdb_outfile);
936                   }
937                 else if ( strm (mode, "msa2tree") || strm (mode, "trmsd"))
938                   {
939                     EA=msa2struc_dist ( A, EA,F->name);
940                   }
941                 le=display_output_filename ( le, "APDB_RESULT", "APDB_RESULT_FORMAT_01", apdb_outfile, CHECK);
942                 
943                 if ( n_pdb>=2)
944                   {
945                     declare_name (file_name);
946                     for ( a=0; a< n_out_aln_format; a++)
947                       {
948                         if ( strm2( outfile, "stdout", "stderr"))sprintf (file_name, "%s", outfile);
949                         else if ( strm (outfile, "default"))
950                           sprintf (file_name, "%s.%s",F->name, out_aln_format[a]);
951                         else
952                           sprintf (file_name, "%s.%s",outfile,out_aln_format[a]);
953                         
954                         output_format_aln (out_aln_format[a],A,EA,file_name);
955                         le=display_output_filename ( le, "MSA", out_aln_format[a], file_name, CHECK);
956                       }
957                   }
958                 return EXIT_SUCCESS;
959     }
960
961
962
963 Constraint_list * set_constraint_list4align_pdb (Constraint_list *CL,int seq, char *dp_mode, char *local_mode, char *param_file)
964 {
965   static Constraint_list *PWCL;
966   static Pdb_param *pdb_param; 
967   char **x;
968   int n;
969
970   if ( !CL)
971     {
972       free_constraint_list (PWCL);
973       return NULL;
974     }
975   else if ( !PWCL)
976     {
977       PWCL=declare_constraint_list ( CL->S,NULL, NULL, 0,NULL, NULL);
978
979       pdb_param=vcalloc ( 1, sizeof(Pdb_param));
980       pdb_param->N_ca=0;
981       pdb_param->max_delta=2.0;
982       pdb_param->maximum_distance=14;
983       declare_name (pdb_param->local_mode);
984       sprintf (pdb_param->local_mode, "%s", local_mode);
985       pdb_param->scale=50;
986
987       PWCL->pw_parameters_set=1;
988       PWCL->S=CL->S;
989       PWCL->lalign_n_top=10;
990       PWCL->sw_min_dist=10;
991       
992       PWCL->T=vcalloc ( (PWCL->S)->nseq, sizeof (Ca_trace*));
993       
994       PWCL->extend_jit=0;
995       PWCL->maximise=1;
996       /*PWCL->gop=-40;*/
997       PWCL->gop=-50;
998       PWCL->gep=-20;
999       sprintf (CL->matrix_for_aa_group, "vasiliky");
1000       PWCL->use_fragments=0;
1001       PWCL->ktup=0;
1002       PWCL->TG_MODE=1;
1003     }
1004   
1005    
1006    if ( param_file && check_file_exists ( param_file) )
1007         {
1008           if ( (x=get_parameter ( "-nca",              &n, param_file))!=NULL){pdb_param->N_ca=atoi(x[0]);free_char (x, -1);}
1009           if ( (x=get_parameter ( "-max_delta",        &n, param_file))!=NULL){pdb_param->max_delta=atof(x[0]);free_char (x, -1);}
1010           if ( (x=get_parameter ( "-maximum_distance", &n, param_file))!=NULL){pdb_param->maximum_distance=atoi(x[0]);free_char (x, -1);}
1011           if ( (x=get_parameter ( "-local_mode",       &n, param_file))!=NULL){sprintf (pdb_param->local_mode, "%s",x[0]);free_char (x, -1);}
1012           if ( (x=get_parameter ( "-scale",            &n, param_file))!=NULL){pdb_param->scale=atoi(x[0]);free_char (x, -1);}
1013           if ( (x=get_parameter ( "-gapopen", &n, param_file))!=NULL){PWCL->gop=atoi(x[0]);free_char (x, -1);}
1014           if ( (x=get_parameter ( "-gapext" , &n, param_file))!=NULL){PWCL->gep=atof(x[0]);free_char (x, -1);}
1015
1016         }
1017    
1018    
1019    
1020      
1021    sprintf ( PWCL->dp_mode, "%s", dp_mode);
1022    
1023    if (strm (PWCL->dp_mode, "lalign"))sprintf (PWCL->dp_mode,"sim_pair_wise_lalign");
1024    else if (strm (PWCL->dp_mode, "sw"))sprintf (PWCL->dp_mode,"gotoh_pair_wise_sw");
1025    
1026    local_mode=pdb_param->local_mode;
1027    if ( strm ( local_mode, "hasch_ca_trace_nb"))      PWCL->evaluate_residue_pair=evaluate_ca_trace_nb; 
1028    else if ( strm ( local_mode, "hasch_ca_trace_bubble")) PWCL->evaluate_residue_pair=evaluate_ca_trace_bubble;
1029    else if ( strm ( local_mode, "hasch_ca_trace_sap1_bubble")) PWCL->evaluate_residue_pair=evaluate_ca_trace_sap1_bubble;
1030    else if ( strm ( local_mode, "hasch_ca_trace_sap2_bubble")) PWCL->evaluate_residue_pair=evaluate_ca_trace_sap2_bubble;
1031    
1032    else if ( strm ( local_mode, "hasch_ca_trace_transversal")) PWCL->evaluate_residue_pair=evaluate_ca_trace_transversal;
1033    else if ( strm ( local_mode, "hasch_ca_trace_bubble_2")) PWCL->evaluate_residue_pair=evaluate_ca_trace_bubble_2;
1034    else if ( strm ( local_mode, "hasch_ca_trace_bubble_3")) PWCL->evaluate_residue_pair=evaluate_ca_trace_bubble_3;
1035    else if ( strm ( local_mode, "custom_pair_score_function1"))  PWCL->evaluate_residue_pair=custom_pair_score_function1;
1036    else if ( strm ( local_mode, "custom_pair_score_function2"))  PWCL->evaluate_residue_pair=custom_pair_score_function2;
1037    else if ( strm ( local_mode, "custom_pair_score_function3"))  PWCL->evaluate_residue_pair=custom_pair_score_function3;
1038    else if ( strm ( local_mode, "custom_pair_score_function4"))  PWCL->evaluate_residue_pair=custom_pair_score_function4;
1039    else if ( strm ( local_mode, "custom_pair_score_function5"))  PWCL->evaluate_residue_pair=custom_pair_score_function5;
1040    else if ( strm ( local_mode, "custom_pair_score_function6"))  PWCL->evaluate_residue_pair=custom_pair_score_function6;
1041    else if ( strm ( local_mode, "custom_pair_score_function7"))  PWCL->evaluate_residue_pair=custom_pair_score_function7;
1042    else if ( strm ( local_mode, "custom_pair_score_function8"))  PWCL->evaluate_residue_pair=custom_pair_score_function8;
1043    else if ( strm ( local_mode, "custom_pair_score_function9"))  PWCL->evaluate_residue_pair=custom_pair_score_function9;
1044    else if ( strm ( local_mode, "custom_pair_score_function10")) PWCL->evaluate_residue_pair=custom_pair_score_function10;
1045    
1046    
1047    else 
1048      {
1049        fprintf ( stderr, "\n%s is an unknown hasch mode, [FATAL]\n", local_mode);
1050        myexit (EXIT_FAILURE);
1051      }
1052    
1053    if ( PWCL->T[seq]);
1054    else
1055      {
1056        PWCL->T[seq]=read_ca_trace (is_pdb_struc((CL->S)->name[seq]), "ATOM");
1057        (PWCL->T[seq])->pdb_param=pdb_param;
1058        PWCL->T[seq]=trim_ca_trace (PWCL->T[seq], (CL->S)->seq[seq]);
1059        PWCL->T[seq]=hasch_ca_trace(PWCL->T[seq]);
1060        
1061      }
1062    
1063    
1064   return PWCL;
1065 }
1066
1067   
1068
1069 int evaluate_ca_trace_nb (Constraint_list *CL, int s1, int r1, int s2, int r2)
1070    {
1071     
1072      return (int)(neighborhood_match(CL, s1,r1, s2, r2, (CL->T[s1])->Chain,(CL->T[s2])->Chain ));
1073    }
1074 int evaluate_ca_trace_sap2_bubble (Constraint_list *CL, int s1, int r1, int s2, int r2)
1075      {
1076        
1077        
1078        
1079        return sap2_neighborhood_match (CL, s1, r1, s2, r2, (CL->T[s1])->Bubble,(CL->T[s2])->Bubble );
1080        
1081      }
1082 int evaluate_ca_trace_sap1_bubble (Constraint_list *CL, int s1, int r1, int s2, int r2)
1083      {
1084        /*
1085          Function documentation: start
1086          
1087          int evaluate_ca_trace_sap1_bubble (Constraint_list *CL, int s1, int s2, int r1, int r2)
1088          This function evaluates the cost for matching two residues:
1089          
1090          a1 is the cost for matching the two neighborood ( bubble type), using sap
1091          a1: [0,+100], +100 is the best possible match.
1092          a2 is the residue type weight:
1093             min=worst substitution value
1094             best=best of r1/r1, r2/r2-min
1095
1096             a2=(r1/r2 -min)/best --> a1:[0, 100]
1097          
1098          score=a1*a2-->[-inf, +10000];
1099        */
1100          
1101          
1102        
1103        float a1;
1104        
1105        
1106        a1=(int) sap1_neighborhood_match (CL, s1, r1, s2, r2, (CL->T[s1])->Bubble,(CL->T[s2])->Bubble );
1107        
1108        return (int)a1;
1109        
1110
1111      }
1112 int evaluate_ca_trace_bubble (Constraint_list *CL, int s1, int r1, int s2, int r2)
1113      {
1114        /*
1115          Function documentation: start
1116          
1117          int evaluate_ca_trace_bubble (Constraint_list *CL, int s1, int s2, int r1, int r2)
1118          This function evaluates the cost for matching two residues:
1119          
1120          a1 is the cost for matching the two neighborood ( bubble type)
1121          a1: [-inf,+100-scale], +100-scale is the best possible match.
1122          
1123        */
1124          
1125          
1126        
1127        float a1;
1128
1129        
1130             
1131        a1=(int) neighborhood_match (CL, s1, r1, s2, r2, (CL->T[s1])->Bubble,(CL->T[s2])->Bubble )-((CL->T[s1])->pdb_param)->scale;
1132       
1133               return a1;
1134        
1135
1136      }
1137 int evaluate_ca_trace_transversal (Constraint_list *CL, int s1, int r1, int s2, int r2)
1138      {
1139        return (int)(transversal_match (CL, s1, r1, s2, r2, (CL->T[s1])->Transversal,(CL->T[s2])->Transversal ));
1140      }
1141
1142 int evaluate_ca_trace_bubble_3 (Constraint_list *CL, int s1, int r1, int s2, int r2)
1143      {
1144        /*This Mode evaluates :
1145          
1146          1-The Bubble
1147          2-The Match of the transversal residues
1148        */
1149
1150        int a1, l1;
1151        int a2, l2;
1152        int a;
1153        
1154        l1=MAX(( (CL->T[s1])->Chain )->nb[r1][0] ,((CL->T[s2])->Chain )->nb[r2][0]);
1155        l2=MAX(( (CL->T[s1])->Bubble)->nb[r1][0], ((CL->T[s2])->Bubble)->nb[r2][0]);
1156        
1157        a1=(int)(neighborhood_match (CL, s1, r1, s2, r2, (CL->T[s1])->Bubble,(CL->T[s2])->Bubble ));
1158        a2=(int)(transversal_match (CL, s1, r1, s2, r2, (CL->T[s1])->Transversal,(CL->T[s2])->Transversal ));
1159        
1160        if ( !l1 && !l2)return 0;
1161        a=(a1+a2)/2;
1162        return a;
1163      }
1164 int evaluate_ca_trace_bubble_2 (Constraint_list *CL, int s1, int r1, int s2, int r2)
1165      {
1166        /*This Mode evaluates :
1167          1-The Ca neighborhood
1168          2-The Bubble
1169        */
1170
1171       
1172        return (int)((neighborhood_match (CL, s1, r1, s2, r2, (CL->T[s1])->Chain,(CL->T[s2])->Chain )));
1173      }
1174
1175
1176 /*********************************************************************************************/
1177 /*                                                                                           */
1178 /*         FUNCTIONS FOR COMPARING TWO NEIGHBORHOODS:START                                   */
1179 /*                                                                                           */
1180 /*********************************************************************************************/
1181 float matrix_match (Constraint_list *CL, int s1, int r1, int s2, int r2, Struct_nb *nbs1, Struct_nb *nbs2)
1182
1183      {
1184        /*
1185          Function documentation: start
1186          
1187          float matrix_match (Constraint_list *CL, int s1, int s2, int r1, int r2, Struct_nb *nbs1, Struct_nb *nbs2)
1188          This function evaluates the matrix for matching two residues:
1189         
1190             min=worst substitution value
1191             best=best of r1/r1, r2/r2-min
1192
1193             a2=(r1/r2 -min)/best --> a1:[0, 100]
1194          
1195          score=a1*a2-->[-inf, +10000];
1196        */
1197          
1198          
1199        
1200        float a2;
1201        float m1, m2, m;
1202        static float min=0;
1203        int a, b;
1204
1205        if ( !CL->M) 
1206          {
1207            CL->M=read_matrice ( "pam250mt");
1208            min=CL->M[0][0];
1209            for ( a=0; a< 26; a++)
1210              for ( b=0; b< 26; b++)min=MIN(min, CL->M[a][b]);
1211          }
1212
1213        if ( r1<=0 || r2<=0)return 0;
1214        m1=CL->M[(CL->S)->seq[s1][r1-1]-'A'][(CL->S)->seq[s1][r1-1]-'A']-min;
1215        m2=CL->M[(CL->S)->seq[s2][r2-1]-'A'][(CL->S)->seq[s2][r2-1]-'A']-min;
1216        m=MAX(m1, m2);
1217        a2=(CL->M[(CL->S)->seq[s1][r1-1]-'A'][(CL->S)->seq[s2][r2-1]-'A']-min)/m;
1218      
1219        return a2;
1220      }
1221
1222      
1223 float transversal_match (Constraint_list *CL, int s1, int r1, int s2, int r2, Struct_nb *nbs1, Struct_nb *nbs2)
1224       {
1225         int a, l1, l2;
1226         float score=0;
1227         float delta, max_delta;
1228         float max;
1229         Pdb_param*PP;
1230         
1231         PP=(CL->T[s1])->pdb_param;
1232         max_delta=PP->max_delta;
1233
1234         l1=nbs1->nb[r1][0];
1235         l2=nbs2->nb[r2][0];
1236
1237         if ( l1!=l2 || l1<(PP->N_ca)) return 0;
1238         
1239
1240         max=MAX(l1, l2)*max_delta;
1241         for ( delta=0,a=0; a< l2 ; a++)
1242                {
1243                    
1244                    delta+=max_delta-FABS((nbs1->d_nb[r1][a]-nbs2->d_nb[r2][a]));
1245                }
1246         score=(delta*100)/max;
1247        
1248
1249               
1250        return score;
1251       }
1252         
1253 float neighborhood_match (Constraint_list *CL, int s1, int r1, int s2, int r2, Struct_nb *nbs1, Struct_nb *nbs2)
1254       {
1255           static float **table;
1256           static int table_size;
1257           int a, b, l1, l2;
1258           float score=0;
1259           float ins, del, sub;
1260           float delta, max_delta;
1261           float max;
1262           Pdb_param*PP;
1263
1264        
1265        PP=(CL->T[s1])->pdb_param;
1266        max_delta=PP->max_delta;
1267
1268        
1269        if ( r1> 0 && r2 >0) {r1--; r2--;}
1270        else return 0;
1271        
1272        l1=nbs1->nb[r1][0];
1273        l2=nbs2->nb[r2][0];
1274
1275        if (table_size< (MAX(l1, l2)+1))
1276          {
1277          table_size=MAX(l1, l2)+1;
1278          if ( table)free_float (table, -1);
1279          table=NULL;
1280          }
1281        if ( !table) table=declare_float (table_size, table_size);
1282        
1283          
1284        max=MAX(l1, l2)*max_delta;
1285        if ( max==0)return 0;
1286
1287        
1288        table[0][0]=0;
1289        for ( b=1; b<=l2; b++)
1290            { 
1291            table[0][b]=0;
1292            }
1293        for ( a=1; a<=l1; a++)
1294            {
1295            table[a][0]=0;
1296            for ( b=1; b<=l2 ; b++)
1297                {
1298                    
1299                    delta=max_delta-FABS((nbs1->d_nb[r1][a]-nbs2->d_nb[r2][b]));
1300                                    
1301                    del=table[a-1][b];
1302                    ins=table[a][b-1];
1303                    sub= table[a-1][b-1]+delta;
1304
1305                    if ( del >= ins && del >= sub)score=del;
1306                    else if ( ins >= del && ins >= sub) score=ins;
1307                    else score=sub;                 
1308                    table[a][b]=score;
1309                }
1310            }
1311       
1312
1313        score=((((score)*100)/max));
1314        
1315               
1316        return score;
1317       }
1318
1319 float sap1_neighborhood_match (Constraint_list *CL, int s1, int r1, int s2, int r2, Struct_nb *nbs1, Struct_nb *nbs2)
1320       {
1321         /*
1322           Function documentation: start
1323           
1324           float sap1_neighborhood_match (Constraint_list *CL, int s1, int s2, int r1, int r2, Struct_nb *nbs1, Struct_nb *nbs2)
1325           This function is adapted from Taylor, Orengo, Protein Structure Alignment JMB 1989, (208)1-22
1326           It is the first function where 
1327           score= A/(|dra-drb|+b)
1328           
1329           Function documentation: end
1330         */
1331           
1332           static float **table;
1333           static int table_size;
1334           int a, b, l1, l2;
1335           float score=0;
1336           float ins, del, sub;
1337           float delta;
1338           float max;
1339
1340           int A=50;
1341           int B=5;
1342         
1343        
1344
1345        
1346
1347        
1348        if ( r1> 0 && r2 >0) {r1--; r2--;}
1349        else return 0;
1350        
1351        l1=nbs1->nb[r1][0];
1352        l2=nbs2->nb[r2][0];
1353
1354        if (table_size< (MAX(l1, l2)+1))
1355          {
1356          table_size=MAX(l1, l2)+1;
1357          if ( table)free_float (table, -1);
1358          table=NULL;
1359          }
1360        if ( !table) table=declare_float (table_size, table_size);
1361        
1362          
1363        max=MAX(l1, l2)*(A/B);
1364        if ( max==0)return 0;
1365
1366        
1367        table[0][0]=0;
1368        for ( b=1; b<=l2; b++)
1369            { 
1370            table[0][b]=0;
1371            }
1372        for ( a=1; a<=l1; a++)
1373            {
1374            table[a][0]=0;
1375            for ( b=1; b<=l2 ; b++)
1376                {
1377         
1378                    delta=A/(FABS((nbs1->d_nb[r1][a]-nbs2->d_nb[r2][b]))+B);
1379         
1380                    del=table[a-1][b];
1381                    ins=table[a][b-1];
1382                    sub= table[a-1][b-1]+delta;
1383                    if ( del >= ins && del >= sub)score=del;
1384                    else if ( ins >= del && ins >= sub) score=ins;
1385                    else score=sub;                 
1386                    table[a][b]=score;
1387                }
1388            }
1389       
1390
1391        score=((score*100))/(max);
1392        
1393               
1394        return score;
1395       }
1396
1397 float sap2_neighborhood_match (Constraint_list *CL, int s1, int r1, int s2, int r2, Struct_nb *nbs1, Struct_nb *nbs2)
1398       {
1399         /*
1400           Function documentation: start
1401           
1402           float sap1_neighborhood_match (Constraint_list *CL, int s1, int s2, int r1, int r2, Struct_nb *nbs1, Struct_nb *nbs2)
1403           This function is adapted from Taylor, Orengo, Protein Structure Alignment JMB 1989, (208)1-22
1404           It is the first function where 
1405           score= A/(|dra-drb|+b)
1406           
1407           Function documentation: end
1408         */
1409           
1410           static float **table;
1411           static int table_size;
1412           int a, b, l1, l2;
1413           float score=0;
1414           float ins, del, sub;
1415           float delta;
1416           float max;
1417
1418           Amino_acid **pep1;
1419           Amino_acid **pep2;
1420           static Atom *vX_1, *vY_1, *vZ_1;
1421           static Atom *vX_2, *vY_2, *vZ_2;
1422           static Atom *ca1, *ca2;
1423           float val;
1424  
1425           int A=50;
1426           int B=2;
1427           
1428        
1429
1430
1431        if ( r1> 0 && r2 >0) {r1--; r2--;}
1432        else return 0;
1433
1434        /*Make up the referencial*/
1435        pep1=(CL->T[s1])->peptide_chain;
1436        pep2=(CL->T[s2])->peptide_chain;
1437
1438        /*Get Referencial for CA1*/ 
1439        if ( (pep1[r1])->C)vX_1 =diff_atom(pep1[r1]->C,pep1[r1]->CA, vX_1);
1440        if ( (pep1[r1])->N)vY_1 =diff_atom(pep1[r1]->N,pep1[r1]->CA, vY_1);
1441        if ( (pep1[r1])->CB)vZ_1=diff_atom(pep1[r1]->CB,(pep1[r1])->CA,vZ_1);
1442        else vZ_1=add_atom (vX_1, vY_1, vZ_1); 
1443
1444        
1445       
1446       
1447
1448        /*Get Referencial for CA2*/ 
1449        if ( (pep2[r2])->C)vX_2 =diff_atom((pep2[r2])->C,(pep2[r2])->CA, vX_2);
1450        if ( (pep2[r2])->N)vY_2 =diff_atom((pep2[r2])->N,(pep2[r2])->CA, vY_2);
1451        if ( (pep2[r2])->CB)vZ_2=diff_atom((pep2[r2])->CB,(pep2[r2])->CA, vZ_2);
1452        else vZ_2=add_atom (vX_2, vY_2, vZ_2); 
1453        
1454       
1455       
1456
1457        /*END OF GETTING REFERENCIAL*/ 
1458          
1459        /*Test
1460        if ( r1>1 && r2>1)
1461          {
1462          fprintf (stdout,"\n\t*******");
1463          
1464          fprintf (stdout, "RESIDUE %d %c", r1, (CL->S)->seq[s1][r1]);
1465          if ( (pep1[r1])->CA)fprintf (stdout,"\n\tCA ");print_atom (pep1[r1]->CA );
1466          if ( (pep1[r1])->C)fprintf (stdout,"\n\tC  ");print_atom (pep1[r1]->C );
1467          if ( (pep1[r1])->N)fprintf (stdout,"\n\tN  ");print_atom (pep1[r1]->N );
1468          if ( (pep1[r1])->CB)fprintf (stdout,"\n\tCB ");print_atom (pep1[r1]->CB );
1469          fprintf (stdout,"\n\t*******");
1470          fprintf (stdout,"\n\tvX ");print_atom ( vX_1);
1471          fprintf (stdout,"\n\tvY ");print_atom ( vY_1);
1472          fprintf (stdout,"\n\tvZ ");print_atom ( vZ_1);
1473
1474          ca1= copy_atom ((pep1[r1-1])->CA, ca1);
1475          ca1 =diff_atom(ca1,(pep1[r1])->CA, ca1);
1476          fprintf (stdout,"\n\tca ");print_atom ( ca1);
1477          fprintf ( stdout, "\n\tSQ1=%d ", (int)square_atom(ca1));
1478          ca1=reframe_atom(vX_1, vY_1, vZ_1, ca1, ca1);
1479          fprintf ( stdout, "\n\tSQ2=%d ", (int)square_atom(ca1));
1480          fprintf (stdout,"\n\tca ");print_atom ( ca1);
1481          fprintf (stdout,"\n\n");
1482          }
1483        */
1484
1485        l1=nbs1->nb[r1][0];
1486        l2=nbs2->nb[r2][0];
1487
1488        if (table_size< (MAX(l1, l2)+1))
1489          {
1490          table_size=MAX(l1, l2)+1;
1491          if ( table)free_float (table, -1);
1492          table=NULL;
1493          }
1494        if ( !table) table=declare_float (table_size, table_size);
1495        
1496          
1497        max=MAX(l1, l2)*(A/B);
1498       
1499        if ( max==0)return 0;
1500        
1501        
1502        table[0][0]=0;
1503        for ( b=1; b<=l2; b++)
1504            { 
1505            table[0][b]=0;
1506            }
1507
1508        for ( a=1; a<=l1; a++)
1509            {
1510            ca1=copy_atom ((CL->T[s1])->structure[nbs1->nb[r1][a]], ca1);
1511            ca1=diff_atom(ca1,(pep1[r1])->CA, ca1);
1512            ca1=reframe_atom(vX_1, vY_1, vZ_1, ca1, ca1);
1513            
1514            table[a][0]=0;
1515            for ( b=1; b<=l2 ; b++)
1516                {
1517                   ca2  =copy_atom((CL->T[s2])->structure[nbs2->nb[r2][b]], ca2);
1518                   ca2  =diff_atom(ca2,(pep2[r2])->CA, ca2);
1519                   ca2  =reframe_atom(vX_2, vY_2, vZ_2, ca2, ca2);
1520                   
1521                   ca2=diff_atom(ca2,ca1,ca2);
1522                   val=square_atom (ca2);
1523                   
1524                   val=(float)sqrt ((double)val);
1525                   
1526                   delta=A/(val+B);
1527                   
1528
1529                   del=table[a-1][b];
1530                   ins=table[a][b-1];
1531                   sub= table[a-1][b-1]+delta;
1532
1533                   if ( del >= ins && del >= sub)score=del;
1534                   else if ( ins >= del && ins >= sub) score=ins;
1535                   else score=sub;                  
1536                   table[a][b]=score;
1537                }
1538            }
1539       
1540
1541        score=(((score*100))/(max)-50);
1542        
1543               
1544        return score;
1545       }
1546
1547 /*********************************************************************************************/
1548 /*                                                                                           */
1549 /*         APDB                                                                              */
1550 /*                                                                                           */
1551 /*********************************************************************************************/
1552 float **** irmsdmin_apdb_filtration ( Alignment *A, float ****residues, Constraint_list *CL, Pdb_param *PP, FILE *fp)
1553 {
1554   int s1, s2, a,col1, n,n2=0, t,flag;
1555   int **pos, **list;
1556   float nirmsd, min_nirmsd,max_nirmsd,ref_sum, sum, sum2;
1557   float **normalized_len;
1558   
1559   normalized_len=declare_float (A->nseq+1, A->nseq+1);
1560   for (s1=0; s1<A->nseq; s1++)
1561     {
1562       int l1, l2, r1, r2, p;
1563       for (s2=0; s2<A->nseq; s2++)
1564         {
1565           for ( l1=l2=p=0; p< A->len_aln; p++)
1566             {
1567               r1=A->seq_al[s1][p];
1568               r2=A->seq_al[s2][p];
1569               if (!is_gap(r1) && isupper(r1))l1++;
1570               if (!is_gap(r2) && isupper(r2))l2++;
1571             }
1572           normalized_len[s1][s2]=MIN(l1,l2);
1573         }
1574     }
1575   
1576   pos=aln2pos_simple (A, A->nseq);
1577   for ( s1=0; s1< A->nseq; s1++)
1578     for ( s2=0; s2<A->nseq; s2++)
1579       {
1580         if ( s1==s2) continue;
1581         else if (!(CL->T[A->order[s1][0]]) || !(CL->T[A->order[s2][0]]))continue; 
1582
1583         list=declare_int (A->len_aln, 2); 
1584         
1585         for ( sum=0,n=0,col1=0; col1< A->len_aln; col1++)
1586           {
1587             if ( islower (A->seq_al[s1][col1]) || islower ( A->seq_al[s2][col1]))continue;
1588             else if ( pos[s1][col1]<=0 || pos[s2][col1]<=0 ) continue;
1589             else if ( residues[s1][s2][pos[s1][col1]-1][0]==0)continue; 
1590             
1591             list[n][0]=pos[s1][col1]-1;
1592             list[n][1]=(int)100000*residues[s1][s2][pos[s1][col1]-1][4];
1593             sum2+=residues[s1][s2][pos[s1][col1]-1][4];
1594             n++;
1595           }
1596
1597         if (n==0)return residues;
1598         
1599         sort_int_inv (list, 2, 1,0, n-1);
1600         for (sum=0,a=0; a<n; a++)
1601           {
1602             sum+=list[a][1];
1603           }
1604         ref_sum=sum;
1605         nirmsd=min_nirmsd=max_nirmsd=sum/(n*n);
1606         t=0;
1607         
1608
1609         /*1 Find the maximum*/
1610         sum=ref_sum;
1611         for (flag=0,a=0; a< n-1; a++)
1612           {
1613             sum-=list[a][1];
1614             nirmsd=sum/((n-(a+1))*(n-(a+1)));
1615             if (nirmsd<max_nirmsd)flag=1;
1616             if ((nirmsd>=max_nirmsd) && flag==1)break;
1617             n2=a;
1618           }
1619         
1620         sum=ref_sum;
1621         for (a=0; a<n2-1; a++)
1622           {
1623             sum-=list[a][1];
1624             nirmsd=sum/((n-(a+1))*(n-(a+1)));
1625             
1626             
1627             if ( nirmsd<min_nirmsd)
1628               {
1629                 min_nirmsd=nirmsd;
1630                 t=a;
1631                 if ( PP->nirmsd_graph)
1632                   {
1633                     fprintf ( stdout, "\n_NIRMSD_GRAPH %s %s POS: %4d Removed: %4d NiRMSD: %.2f", A->name[s1], A->name[s2], list[a][0],a,(nirmsd/100000)*normalized_len[s1][s2]);
1634                   }
1635               }
1636           }
1637
1638         if ( PP->print_rapdb)
1639           {
1640             for ( a=0; a<n; a++)
1641               {
1642                 if      ( list[a][1]>0 && a<=t)fprintf ( stdout, "\nRAPDB QUANTILE REMOVE S1: %3d S2: %3d COL: %3d SCORE*100: %d", s1, s2, list[a][0], list[a][1]);
1643                 else if ( list[a][1]>0 && a>t)fprintf ( stdout, "\nRAPDB QUANTILE KEEP   S1: %3d S2: %3d COL: %3d SCORE*100: %d", s1, s2, list[a][0], list[a][1]);
1644               }
1645           }
1646
1647         fprintf ( stdout, "\n# MINIMISATION FILTER ON: NiRMSD minimsation resulted in the removal of %d [out of %d] Columns On the alignment %s Vs %s\n", t, n, A->name[s1], A->name[s2]);
1648         for ( a=0; a<=t; a++)
1649           {
1650
1651             residues[s1][s2][list[a][0]][0]=0;
1652             residues[s1][s2][list[a][0]][1]=0;
1653             residues[s1][s2][list[a][0]][2]=0;
1654             residues[s1][s2][list[a][0]][3]=0;
1655             residues[s1][s2][list[a][0]][4]=-1;
1656             
1657           }
1658         
1659         free_int (list, -1);
1660       }
1661   free_float (normalized_len, -1);
1662   return residues;
1663 }
1664 float **** quantile_apdb_filtration ( Alignment *A, float ****residues, Constraint_list *CL, Pdb_param *PP,FILE *fp)
1665 {
1666   int s1, s2, a,col1, n, t;
1667   int **pos, **list;
1668   
1669   pos=aln2pos_simple (A, A->nseq);
1670   for ( s1=0; s1< A->nseq; s1++)
1671     for ( s2=0; s2<A->nseq; s2++)
1672       {
1673         if ( s1==s2) continue;
1674         else if (!(CL->T[A->order[s1][0]]) || !(CL->T[A->order[s2][0]]))continue; 
1675
1676         list=declare_int (A->len_aln, 2); 
1677         
1678         for ( n=0,col1=0; col1< A->len_aln; col1++)
1679           {
1680             if ( islower (A->seq_al[s1][col1]) || islower ( A->seq_al[s2][col1]))continue;
1681             else if ( pos[s1][col1]<=0 || pos[s2][col1]<=0 ) continue;
1682                     
1683             list[n][0]=pos[s1][col1]-1;
1684             list[n][1]=(int)100*residues[s1][s2][pos[s1][col1]-1][4];
1685             n++;
1686             
1687           }
1688         
1689         sort_int_inv (list, 2, 1,0, n-1);
1690         
1691         t=quantile_rank ( list,1, n,PP->filter);
1692         
1693         if ( PP->print_rapdb)
1694           {
1695             for ( a=0; a<n; a++)
1696               {
1697                 if      ( list[a][1]>0 && a<t)fprintf ( stdout, "\nRAPDB QUANTILE REMOVE S1: %3d S2: %3d COL: %3d SCORE*100: %d", s1, s2, list[a][0], list[a][1]);
1698                 else if ( list[a][1]>0 && a>t)fprintf ( stdout, "\nRAPDB QUANTILE KEEP   S1: %3d S2: %3d COL: %3d SCORE*100: %d", s1, s2, list[a][0], list[a][1]);
1699               }
1700           }
1701                                             
1702         for ( a=0; a<t; a++)
1703           {
1704
1705             residues[s1][s2][list[a][0]][0]=0;
1706             residues[s1][s2][list[a][0]][1]=0;
1707             residues[s1][s2][list[a][0]][2]=0;
1708             residues[s1][s2][list[a][0]][3]=0;
1709             residues[s1][s2][list[a][0]][4]=-1;
1710             
1711           }
1712         
1713         free_int (list, -1);
1714       }
1715         
1716   return residues;
1717 }
1718 Alignment * analyse_pdb ( Alignment *A, Alignment *ST, char *results)
1719       {
1720         int s1,s2,r1, r2,b, p;
1721         int **pos;
1722         float **normalize_len;
1723         float m2, m4;
1724         float pair_tot=0, pair_m1, pair_m2, pair_m3, pair_m4, pair_m5, pair_len=0;
1725         float seq_tot, seq_m1, seq_m2, seq_m3, seq_m4, seq_m5,seq_len;
1726         float msa_tot, msa_m1, msa_m2, msa_m3, msa_m4, msa_m5, msa_len;
1727         float iRMSD_unit, iRMSD_max, iRMSD_min;
1728         float ****residues;
1729         Pdb_param *PP=NULL;
1730         Constraint_list *CL;
1731         char *average_file, *pairwise_file, *total_file, *irmsd_file=0;
1732         FILE *fp, *average,*pairwise, *total, *irmsd_graph=0;
1733         
1734         
1735         fp      =vfopen ( results, "w");
1736         pairwise=vfopen ((pairwise_file=vtmpnam (NULL)),"w");
1737         average =vfopen ((average_file =vtmpnam (NULL)),"w");
1738         total   =vfopen ((total_file   =vtmpnam (NULL)),"w");
1739
1740         
1741         CL=A->CL;
1742         
1743         for ( s1=0; s1< (A->S)->nseq; s1++)
1744           if ( CL->T[s1]){PP=(CL->T[s1])->pdb_param;break;}
1745
1746         if (PP->irmsd_graph)irmsd_graph   =vfopen ((irmsd_file =vtmpnam (NULL)),"w");
1747         
1748         fprintf ( fp, "\nAPDB_RESULT_FORMAT_02\n");
1749         residues=analyse_pdb_residues ( A, A->CL,PP);
1750         if ( PP->filter>=0)residues=quantile_apdb_filtration (A, residues, A->CL,PP, fp);
1751         else if ( PP->filter<0)residues=irmsdmin_apdb_filtration (A, residues, A->CL,PP, fp);
1752         
1753         pos=aln2pos_simple (A, A->nseq);
1754         
1755
1756
1757         
1758         
1759         /*Compute the alignment length for normalization*/
1760           normalize_len=declare_float (A->nseq+1, A->nseq+1);
1761           for (s1=0; s1<A->nseq; s1++)
1762             {
1763               int l1, l2, r1, r2;
1764               for (s2=0; s2<A->nseq; s2++)
1765                 {
1766                   for ( l1=l2=p=0; p< A->len_aln; p++)
1767                     {
1768                       r1=A->seq_al[s1][p];
1769                       r2=A->seq_al[s2][p];
1770                       if (!is_gap(r1) && isupper(r1))l1++;
1771                       if (!is_gap(r2) && isupper(r2))l2++;
1772                     }
1773                   normalize_len[s1][s2]=MIN(l1,l2);
1774                 }
1775             }
1776
1777           msa_len=msa_tot=msa_m1=msa_m2=msa_m3=msa_m4=msa_m5=0;
1778
1779           for ( s1=0; s1< A->nseq; s1++)
1780             {
1781               if ( !(CL->T[A->order[s1][0]]))continue;
1782               seq_len=seq_tot=seq_m1=seq_m2=seq_m3=seq_m4=seq_m5=0;
1783               for ( s2=0; s2< A->nseq; s2++)
1784                 {
1785                   if ( s1==s2)continue;
1786                   if ( !(CL->T[A->order[s2][0]]))continue;
1787                   pair_tot=pair_m1=pair_m2=pair_m3=pair_m4=pair_m5=0;
1788                   for ( p=0; p< A->len_aln; p++)
1789                     {
1790                       r1=A->seq_al[s1][p];
1791                       r2=A->seq_al[s2][p];
1792                       b=pos[s1][p]-1;
1793
1794
1795                       if (PP->filter_aln)
1796                         {
1797                            if (is_gap(r1) || is_gap(r2) || residues[s1][s2][b][0]==0)
1798                              {
1799                                A->seq_al[s1][p]=tolower(r1);
1800                                A->seq_al[s2][p]=tolower(r2);
1801                              }
1802                            else
1803                              {
1804                                 A->seq_al[s1][p]=toupper(r1);
1805                                 A->seq_al[s2][p]=toupper(r2);
1806                              }
1807                            
1808                         }
1809                       
1810                       if ( PP->irmsd_graph && ( is_gap(r1) || is_gap(r2) || residues[s1][s2][b][0]==0))
1811                         {
1812
1813                           fprintf ( irmsd_graph, "\n_IRMSD_GRAPH %10s %10s ALN: %c%c iRMSD: -1.00", A->name[s1], A->name[s2],A->seq_al[s1][p], A->seq_al[s2][p]);
1814                         }
1815                       
1816                       if (is_gap(r1) || is_gap(r2) || residues[s1][s2][b][0]==0)continue; 
1817                       pair_tot++;
1818                 
1819                       /*APDB*/
1820                       m2=(residues[s1][s2][b][2]*100)/residues[s1][s2][b][0];
1821                       if (m2>PP->similarity_threshold){pair_m3++;}
1822                       
1823                       /*iRMSD*/
1824                       
1825                       m4=residues[s1][s2][b][4];
1826                       
1827                       if ( PP->irmsd_graph )
1828                         {
1829                           fprintf ( irmsd_graph, "\nIRMSD_GRAPH %10s %10s ALN: %c%c iRMSD: %.2f", A->name[s1], A->name[s2],A->seq_al[s1][p], A->seq_al[s2][p], m4); 
1830                           }
1831                       pair_m4+=m4;
1832                     }
1833                   pair_len=normalize_len[s1][s2];
1834                   if ( s1>s2)
1835                     {
1836
1837                       fprintf ( pairwise, "\n\n#PAIRWISE: %s Vs %s",A->name[s1], A->name[s2]);
1838                       fprintf ( pairwise, "\n\tPAIRWISE EVALUATED: %6.2f %%    [%s Vs %s] ",  (pair_len==0)?-1:(pair_tot*100)/pair_len,A->name[s1], A->name[s2]);
1839                       fprintf ( pairwise, "\n\tPAIRWISE APDB:      %6.2f %%    [%s Vs %s] ",  (pair_tot==0)?-1:(pair_m3*100)/pair_tot,A->name[s1], A->name[s2]);
1840                       fprintf ( pairwise, "\n\tPAIRWISE iRMSD:     %6.2f Angs [%s Vs %s]",  (pair_tot==0)?-1:pair_m4/pair_tot,A->name[s1], A->name[s2]);
1841                       fprintf ( pairwise, "\n\tPAIRWISE NiRMSD:    %6.2f Angs [%s Vs %s] [%d pos]", (pair_tot==0)?-1:(pair_m4*pair_len)/(pair_tot*pair_tot), A->name[s1], A->name[s2], (int)pair_tot);
1842                       fprintf ( pairwise, "\n\tRAPDB PAIRS PAIRWISE N_NONEMPTY_PAIRS %d N_MAXIMUM_PAIRS %d",(int) pair_tot, (int)pair_len);
1843                       msa_m3+=pair_m3;
1844                        msa_m4+=pair_m4;
1845                        msa_tot+=pair_tot;
1846                        msa_len+=pair_len;
1847                     }
1848                   seq_m3+=pair_m3;
1849                   seq_m4+=pair_m4;
1850                   seq_tot+=pair_tot;
1851                   seq_len+=pair_len;
1852                   
1853                 }
1854               
1855               fprintf ( average, "\n\n#AVERAGE For Sequence %s", A->name[s1]);
1856               fprintf ( average, "\n\tAVERAGE  EVALUATED: %6.2f %%    [%s]", (seq_len==0)?-1:(seq_tot*100)/seq_len, A->name[s1]);
1857               fprintf ( average, "\n\tAVERAGE  APDB:      %6.2f %%    [%s]", (seq_tot==0)?-1:(seq_m3*100)/seq_tot, A->name[s1]);
1858               fprintf ( average, "\n\tAVERAGE  iRMSD:     %6.2f Angs [%s]", (seq_tot==0)?-1:seq_m4/seq_tot, A->name[s1]);
1859               fprintf ( average, "\n\tAVERAGE  NiRMS:     %6.2f Angs [%s]", (seq_tot==0)?-1:(seq_m4*seq_len)/(seq_tot*seq_tot), A->name[s1]);
1860               if ( strm (PP->color_mode, "apdb"))ST->score_seq[s1]=(seq_tot==0)?-1:(seq_m3*100)/pair_tot;
1861               if (PP->print_rapdb)fprintf (average, "\n\tRAPDB PAIRS AVERAGE N_NONEMPTY_PAIRS %d N_MAXIMUM_PAIRS %d", (int)pair_tot, (int)pair_len);
1862               
1863               if ( strm (PP->color_mode, "irmsd"))ST->score_seq[s1]=(seq_tot==0)?-1:10*((seq_m4*pair_len)/(seq_tot*seq_tot));
1864              
1865             }
1866           fprintf ( total, "\n\n#TOTAL for the Full MSA");
1867           fprintf ( total, "\n\tTOTAL     EVALUATED: %6.2f %%  ", (msa_len==0)?-1:(msa_tot*100)/msa_len);
1868           fprintf ( total, "\n\tTOTAL     APDB:      %6.2f %%  ", (msa_tot==0)?-1:(msa_m3*100)/msa_tot);
1869           fprintf ( total, "\n\tTOTAL     iRMSD:     %6.2f Angs", (msa_tot==0)?-1:msa_m4/msa_tot);
1870           fprintf ( total, "\n\tTOTAL     NiRMSD:    %6.2f Angs", (msa_tot==0)?-1:(msa_m4*msa_len)/(msa_tot*msa_tot));
1871           if (PP->print_rapdb)fprintf (total, "\n\tRAPDB PAIRS TOTAL N_NONEMPTY_PAIRS: %d N_MAXIMUM_PAIRS %d", (int)msa_tot, (int)msa_len);
1872           
1873           if ( strm (PP->color_mode, "apdb")) ST->score_aln=ST->score=A->score_aln=A->score=(msa_tot==0)?-1:(msa_m3*100)/msa_tot;
1874           if ( strm (PP->color_mode, "irmsd"))ST->score_aln=ST->score=A->score_aln=A->score=(msa_tot==0)?-1:10*((msa_m4*msa_len)/(msa_tot*msa_tot));
1875         
1876           vfclose (average);vfclose (total); vfclose (pairwise);if (PP->irmsd_graph)vfclose (irmsd_graph);
1877           fp=display_file_content (fp, pairwise_file);
1878           fp=display_file_content (fp, average_file);
1879           fp=display_file_content (fp, total_file);
1880           if ( PP->irmsd_graph)fp=display_file_content (fp, irmsd_file);
1881           
1882           fprintf ( fp, "\n\n# EVALUATED: Fraction of Pairwise Columns Evaluated\n");
1883           fprintf ( fp, "# APDB:      Fraction of Correct Columns according to APDB\n");
1884           fprintf ( fp, "# iRMDS:     Average iRMSD over all evaluated columns\n");
1885           fprintf ( fp, "# NiRMDS:    iRMSD*MIN(L1,L2)/Number Evaluated Columns\n");
1886           fprintf ( fp, "# Main Parameter: -maximum_distance %.2f Angstrom\n", PP->maximum_distance);
1887           
1888           fprintf ( fp, "# Undefined values are set to -1 and indicate LOW Alignment Quality\n");
1889           fp=print_program_information (fp, NULL);
1890           
1891           
1892           
1893           
1894           /*Color Output*/
1895           for (iRMSD_max=0,iRMSD_min=10000,s1=0; s1<A->nseq; s1++)
1896             for ( s2=0; s2< A->nseq; s2++)
1897               for (p=0; p<A->len_aln; p++)
1898                 {
1899                   if ( residues[s1][s2][p][4]>0)
1900                     {
1901                     iRMSD_max=MAX(iRMSD_max, residues[s1][s2][p][4]);
1902                     iRMSD_min=MAX(iRMSD_min, residues[s1][s2][p][4]);
1903                     }
1904                     
1905                 }
1906           iRMSD_unit=iRMSD_max/8;
1907           
1908           for (p=0; p< A->len_aln; p++)
1909             for ( s1=0; s1< A->nseq; s1++)
1910               {
1911                 
1912                 for ( p=0; p< A->len_aln; p++)
1913                   {
1914                     r1=A->seq_al[s1][p];
1915                     b=pos[s1][p]-1;
1916                     if ( is_gap(r1) ||  !(CL->T[A->order[s1][0]]))
1917                       ST->seq_al[s1][p]=NO_COLOR_RESIDUE;
1918                     else
1919                       {
1920                         float tot_m2=0, tot_m4=0, v=0;
1921                         seq_m2=seq_m4=0;
1922                         
1923                         for (s2=0; s2< A->nseq; s2++)
1924                           {
1925                             r2=A->seq_al[s1][p];
1926                             if ( s1==s2) continue;
1927                             if (is_gap(r2) || !(CL->T[A->order[s1][0]]) || residues[s1][s2][b][0]==0)continue;
1928
1929                             seq_m2+=m2=(residues[s1][s2][b][2]*100)/residues[s1][s2][b][0];
1930                             tot_m2++;
1931                             
1932                             m4=residues[s1][s2][b][4];
1933                             if (m4>=0)
1934                               {
1935                                 seq_m4+=m4;
1936                                 tot_m4++;
1937                               }
1938                           }
1939                         
1940                         if (strm ( PP->color_mode, "apdb"))
1941                           {
1942                             if (tot_m2==0)v=NO_COLOR_RESIDUE;
1943                             else v=MIN((seq_m2/(10*tot_m2)),9);
1944                           }
1945                         else if ( strm (PP->color_mode, "irmsd"))
1946                           {
1947                             if ( tot_m4==0)v=NO_COLOR_RESIDUE;
1948                             else v=(8-(int)((seq_m4/(iRMSD_unit*tot_m4))))+1;
1949                           }
1950                         ST->seq_al[s1][p]=v;
1951                         
1952                       }
1953                   }
1954               }
1955           for ( p=0; p<A->len_aln; p++) ST->seq_al[A->nseq][p]=NO_COLOR_RESIDUE;
1956           
1957
1958           ST->generic_comment=vcalloc ( 100, sizeof (int));
1959           if ( strm (PP->color_mode, "apdb"))
1960             {
1961               sprintf ( ST->generic_comment, "# APDB Evaluation: Color Range Blue-[0 %% -- 100 %%]-Red\n# Sequence Score: APDB\n# Local Score: APDB\n\n");
1962             }
1963           else if ( strm (PP->color_mode, "irmsd"))
1964             {
1965               sprintf ( ST->generic_comment, "\n# iRMSD Evaluation:\n# Sequence score: NiRMSD (Angstrom*10)\n# Local Score: iRMSD, Blue-[%.2f Ang. -- 0.00 Ang.]-Red \n", iRMSD_max); 
1966             }
1967
1968           fprintf ( fp, "\n");
1969           vfclose (fp);
1970           free_int (pos, -1);
1971           return ST;
1972       }
1973 float **** analyse_pdb_residues ( Alignment *A, Constraint_list *CL, Pdb_param *pdb_param)
1974      {
1975
1976          int **pos;
1977          int s1, s2, rs1, rs2;
1978          int col1, col2;
1979          float ****distances;
1980          
1981               /*Distances[Nseq][len_aln][4]
1982                 distances...[0]: Number of residues within the bubble
1983                 distances...[1]: Absolute difference of distance of residues within Bubble
1984                 distances...[2]: Number of residues within the bubble with Delta dist < md_threshold
1985                 distances ..[3]: Sum of squared difference of distances
1986                 distances ..[4]: iRMSD
1987               */
1988          float d1, d2,delta;
1989          int wd1, wd2;
1990          int in_bubble=0;
1991          int real_res1_col1=0;
1992          int real_res1_col2;
1993          int real_res2_col1;
1994          int real_res2_col2;
1995          Pdb_param *PP;
1996          int print_rapdb;
1997          float nrapdb, rapdb;
1998          Alignment *BA=NULL;
1999
2000          PP=pdb_param;
2001          print_rapdb=PP->print_rapdb;
2002          
2003          distances=declare_arrayN(4, sizeof (float), A->nseq, A->nseq, 0, 0);
2004          
2005          /*Pre-computation of the internal distances----> T[seq]->ca_dist[len][len]*/
2006          /*Can be avoided if distance_on_request set to 1 */
2007
2008          for ( s1=0; s1< A->nseq; s1++)
2009            {
2010              rs1=A->order[s1][0];
2011              if (CL->T[rs1] &&  !(CL->T[rs1])->ca_dist)(CL->T[rs1])->ca_dist=measure_ca_distances(CL->T[rs1]);
2012              for ( s2=0; s2< A->nseq; s2++)
2013                {
2014                  distances[s1][s2]=declare_float ( A->len_aln, 6);
2015                }
2016            }
2017          pos=aln2pos_simple (A, A->nseq);
2018                  
2019          for ( s1=0; s1< A->nseq; s1++)
2020            for ( col1=0; col1< A->len_aln; col1++)
2021              for ( s2=0; s2<A->nseq; s2++)
2022                {
2023                  rs1=A->order[s1][0];
2024                  rs2=A->order[s2][0];
2025                  rapdb=0;
2026                  nrapdb=0;
2027                  if ( s1==s2) continue;
2028                  else if (!(CL->T[rs1]) || !(CL->T[rs2]))continue; 
2029                  else if ( islower (A->seq_al[s1][col1]) || islower ( A->seq_al[s2][col1]))continue;
2030                  else if ( pos[s1][col1]<=0 || pos[s2][col1]<=0 ) continue;
2031                  
2032                  if ( print_rapdb && s2>s1)
2033                    {
2034                      
2035                      fprintf ( stdout, "RAPDB S1: %s S2: %s POS %d %d %c %d %c ", A->name[s1], A->name[s2], col1+1, pos[s1][col1],A->seq_al[s1][col1], pos[s2][col1],A->seq_al[s2][col1]);
2036                      BA=copy_aln (A, BA);
2037                      lower_string (BA->seq_al[s1]);
2038                      lower_string (BA->seq_al[s2]);
2039                      BA->seq_al[s1][col1]=toupper (BA->seq_al[s1][col1]);
2040                      BA->seq_al[s2][col1]=toupper (BA->seq_al[s2][col1]);
2041                    }
2042                  
2043                  for ( col2=0; col2<A->len_aln; col2++)
2044                    {
2045         
2046                      if (pos[s1][col2]<=0 || pos[s2][col2]<=0 )continue;
2047                      else if ( FABS((pos[s1][col2]-pos[s1][col1]))<=PP->n_excluded_nb)continue;
2048                      else if ( FABS((pos[s2][col2]-pos[s2][col1]))<=PP->n_excluded_nb)continue;
2049                      else if ( islower (A->seq_al[s1][col2]) || islower ( A->seq_al[s2][col2]))continue;
2050                      
2051                      real_res1_col1=pos[s1][col1]-1;
2052                      real_res1_col2=pos[s1][col2]-1;
2053
2054                      real_res2_col1=pos[s2][col1]-1;
2055                      real_res2_col2=pos[s2][col2]-1;
2056                      
2057                      d1=(CL->T[rs1])->ca_dist[real_res1_col1][real_res1_col2];
2058                      d2=(CL->T[rs2])->ca_dist[real_res2_col1][real_res2_col2];
2059                      
2060                      if ( d1==UNDEFINED || d2 == UNDEFINED) continue;
2061                      
2062                      
2063                      
2064                      if ( strm ( PP->local_mode, "sphere"))
2065                        {
2066                          in_bubble= (d1<PP->maximum_distance && d2<PP->maximum_distance)?1:0;              ;               
2067                        }
2068                      else if ( strm ( PP->local_mode, "window"))
2069                        {
2070                          wd1=FABS((pos[s1][col2]-pos[s1][col1]));
2071                          wd2=FABS((pos[s2][col2]-pos[s2][col1]));
2072                          in_bubble= (wd1<PP->maximum_distance && wd2<PP->maximum_distance)?1:0;            ;               
2073                        } 
2074
2075                      if (in_bubble)
2076                        {
2077                          if ( print_rapdb && s2 >s1)
2078                            {
2079                              fprintf ( stdout, "NB %d %d %c %d %c ", col2, pos[s1][col2], A->seq_al[s1][col2], pos[s2][col2], A->seq_al[s2][col2]);
2080                              BA->seq_al[s1][col2]=toupper (BA->seq_al[s1][col2]);
2081                              BA->seq_al[s2][col2]=toupper (BA->seq_al[s2][col2]);
2082                            }
2083                          delta=FABS((d1-d2));              
2084                          if (delta<PP->md_threshold)
2085                            distances[s1][s2][real_res1_col1][2]++;
2086                          distances[s1][s2][real_res1_col1][1]+=delta;
2087                          distances[s1][s2][real_res1_col1][0]++;
2088                          distances[s1][s2][real_res1_col1][3]+=delta*delta;
2089                          nrapdb++;
2090                          rapdb+=delta*delta;
2091                        }
2092                    }
2093
2094                  if ( nrapdb==0)distances[s1][s2][real_res1_col1][4]=-1;
2095                  else distances[s1][s2][real_res1_col1][4]=(float)sqrt((double)(rapdb/nrapdb));
2096
2097                  if ( print_rapdb && s2>s1)
2098                    {
2099                      if (nrapdb==0)
2100                        {
2101                          fprintf ( stdout, "APDB: UNDEFINED\n");
2102                        }
2103                      else 
2104                        {
2105                          
2106                          fprintf ( stdout, " APDB: %.2f ",(float)sqrt((double)(rapdb/nrapdb)));
2107                          BA->residue_case=KEEP_CASE;unalign_residues (BA, s1, s2);
2108                          fprintf ( stdout,"SEQ1: %s %s SEQ2: %s %s\n", BA->name[s1], BA->seq_al[s1], BA->name[s2], BA->seq_al[s2]);
2109                        }
2110                    }
2111                  
2112                }
2113          
2114          free_aln (BA);
2115          free_int (pos, -1);
2116          return distances;
2117      }
2118
2119
2120
2121 Alignment * msa2struc_dist ( Alignment *A, Alignment *ST, char *results)
2122      {
2123
2124        int **pos, c;
2125        FILE *tl;
2126          int s1, s2, rs1, rs2;
2127          int col1, col2;
2128          float ****distances;
2129          float **dm;
2130          int **count;
2131          int   **dm_int;
2132          float min, max;
2133          
2134               /*Distances[Nseq][len_aln][4]
2135                 distances...[0]: Number of residues within the bubble
2136                 distances...[1]: Absolute difference of distance of residues within Bubble
2137                 distances...[2]: Number of residues within the bubble with Delta dist < md_threshold
2138                 distances ..[3]: Sum of squared difference of distances
2139                 distances ..[4]: iRMSD
2140               */
2141          Pdb_param *pdb_param;
2142          Constraint_list *CL;
2143          int a, b, ncol;
2144          float d1, d2,delta;
2145          int wd1, wd2;
2146          int in_bubble=0;
2147          int real_res1_col1=0;
2148          int real_res1_col2;
2149          int real_res2_col1;
2150          int real_res2_col2;
2151          Pdb_param *PP;
2152          int print_rapdb;
2153          float nrapdb, rapdb;
2154          Alignment *BA=NULL;
2155          NT_node *T0,*T1,*T2,*PT, *POS;
2156          NT_node BT0, BT10,BT50, BT100,RBT;
2157          char **pair_pos_list;
2158          
2159          int ntree=0, ntree2;
2160          
2161          Alignment *B;
2162          char *pos_list;
2163          char *tot_pos_list;
2164          char *struc_tree10;
2165          char *struc_tree100;
2166          char *struc_tree50;
2167          char *struc_tree0;
2168          
2169          char *color_struc_tree;
2170          int **score;
2171          int proceed=1;
2172          
2173
2174          
2175          declare_name(tot_pos_list);
2176          sprintf ( tot_pos_list, "%s.tot_pos_list", results);
2177          
2178          declare_name(pos_list);
2179          sprintf ( pos_list, "%s.pos_list", results);
2180
2181          declare_name(struc_tree0);
2182          sprintf ( struc_tree0, "%s.struc_tree_full",results);
2183
2184          declare_name(struc_tree10);
2185          sprintf ( struc_tree10, "%s.struc_tree10",results);
2186
2187          declare_name(struc_tree100);
2188          sprintf ( struc_tree100, "%s.struc_tree100",results);
2189
2190          declare_name(struc_tree50);
2191          sprintf ( struc_tree50, "%s.struc_tree50",results);     
2192          
2193          declare_name(color_struc_tree);
2194          sprintf ( color_struc_tree, "%s.struc_tree.html", results);
2195          
2196          pair_pos_list=declare_char (A->len_aln*A->len_aln+1, 100);  
2197          T1=vcalloc (A->len_aln*A->len_aln+1, sizeof (NT_node));
2198          T2=vcalloc (A->len_aln+1, sizeof (NT_node));
2199          
2200          PT=vcalloc (A->len_aln*A->len_aln+1, sizeof (NT_node));
2201          POS=vcalloc (A->len_aln+1, sizeof (NT_node));
2202          
2203          CL=A->CL;
2204
2205          //Check all sequences have a PDB structure
2206          for (a=0; a<A->nseq; a++)
2207            {
2208              if ( ! seq2P_template_file(A->S,a))
2209                {
2210                  fprintf ( stderr, "\n--- ERROR: %s has no structural template. All sequence in the MSA must have a known structure [FATAL]\n", (A->name[a]));
2211                  proceed=0;
2212                }
2213            }
2214          if (!proceed)
2215             printf_exit (EXIT_FAILURE, stderr, "\n\n---- ERROR: All provided sequences must have a valid PDB identifier [FATAL:tRMSD-%s]\n\n", PROGRAM);
2216          
2217          for ( s1=0; s1< (A->S)->nseq; s1++)
2218            if ( CL->T[s1]){PP=(CL->T[s1])->pdb_param;break;}
2219                  
2220          for ( s1=0; s1< A->nseq; s1++)
2221            {
2222              rs1=A->order[s1][0];
2223              if (CL->T[rs1] &&  !(CL->T[rs1])->ca_dist)(CL->T[rs1])->ca_dist=measure_ca_distances(CL->T[rs1]);
2224            }     
2225          pos=aln2pos_simple (A, A->nseq);
2226          dm=declare_float (A->nseq, A->nseq);
2227          dm_int=declare_int (A->nseq, A->nseq);
2228          count=declare_int (A->nseq, A->nseq);
2229          PP->maximum_distance=10000;
2230          
2231          tl=vfopen (tot_pos_list, "w");
2232          for (ncol=0,ntree=0, col1=0; col1< A->len_aln; col1++)
2233            {
2234              int tree, cont;
2235              output_completion (stderr, col1, A->len_aln,1, "Sample Columns");
2236              for (cont=1,s1=0; s1<A->nseq; s1++)if ( is_gap (A->seq_al[s1][col1]))cont=0;//Stop if gap in column 1
2237              
2238              if ( cont==0)continue;
2239              
2240              for (s1=0; s1<A->nseq; s1++)for ( s2=0; s2<A->nseq; s2++){count[s1][s2]=0;dm[s1][s2]=0;}
2241              
2242              for ( ntree2=0,col2=0; col2<A->len_aln; col2++)
2243                {
2244                  for (s1=0; s1< A->nseq-1; s1++)
2245                    {
2246                      for ( s2=s1+1; s2<A->nseq; s2++)
2247                        {
2248                          rs1=A->order[s1][0];
2249                          rs2=A->order[s2][0];
2250                          cont=1;
2251                          
2252                          if ( s1==s2){dm[s1][s2]=0;continue;}
2253                          else if (!(CL->T[rs1]) || !(CL->T[rs2])){cont=0;}
2254                          else if ( islower (A->seq_al[s1][col1]) || islower ( A->seq_al[s2][col1])){cont=0;}
2255                          else if ( pos[s1][col1]<=0 || pos[s2][col1]<=0 ){cont=0;}
2256                          if (pos[s1][col2]<=0 || pos[s2][col2]<=0 ){cont=0;}//stop if Gap in Column 2
2257                          else if ( FABS((pos[s1][col2]-pos[s1][col1]))<=PP->n_excluded_nb){cont=0;}
2258                          else if ( FABS((pos[s2][col2]-pos[s2][col1]))<=PP->n_excluded_nb){cont=0;}
2259                          else if ( islower (A->seq_al[s1][col2]) || islower ( A->seq_al[s2][col2])){cont=0;}
2260                          if ( cont==0){continue;}
2261                           
2262                          
2263                          real_res1_col1=pos[s1][col1]-1;
2264                          real_res1_col2=pos[s1][col2]-1;
2265                          
2266                          real_res2_col1=pos[s2][col1]-1;
2267                          real_res2_col2=pos[s2][col2]-1;
2268                          
2269                          d1=(CL->T[rs1])->ca_dist[real_res1_col1][real_res1_col2];
2270                          d2=(CL->T[rs2])->ca_dist[real_res2_col1][real_res2_col2];
2271                 
2272                          if ( d1==UNDEFINED || d2 == UNDEFINED) continue;
2273                          
2274                          if ( strm ( PP->local_mode, "sphere"))
2275                            {
2276                              in_bubble= (d1<PP->maximum_distance && d2<PP->maximum_distance)?1:0;                  ;               
2277                            }
2278                          else if ( strm ( PP->local_mode, "window"))
2279                            {
2280                              wd1=FABS((pos[s1][col2]-pos[s1][col1]));
2281                              wd2=FABS((pos[s2][col2]-pos[s2][col1]));
2282                              in_bubble= (wd1<PP->maximum_distance && wd2<PP->maximum_distance)?1:0;                ;               
2283                            } 
2284                          if (in_bubble)
2285                            {
2286                              delta=FABS((d1-d2));
2287                              //delta=delta*delta;
2288                              dm[s1][s2]=dm[s2][s1]+=delta;
2289                              count[s1][s2]++;
2290                              count[s2][s1]++;
2291                            }
2292                        }
2293                    }
2294                }
2295            
2296              
2297              min=max=-1;
2298              for (tree=1,s1=0; s1<A->nseq-1; s1++)
2299                for (s2=s1+1; s2<A->nseq; s2++)
2300                  {
2301                    if ( count [s1][s2])dm[s1][s2]=dm[s2][s1]=dm[s1][s2]/(float)count[s1][s2];
2302                    else 
2303                      {
2304                        tree=0;
2305                      }
2306                    if (s1==0 && s2==1)min=max=dm[s1][s2];
2307                    min=MIN(dm[s1][s2], min);
2308                    max=MAX(dm[s1][s2], max);
2309                  }
2310              if (!tree || min==-1)continue;
2311              for (s1=0; s1<A->nseq-1; s1++)
2312                for (s2=s1+1; s2<A->nseq; s2++)
2313                  {
2314                    dm_int[s1][s2]=dm_int[s2][s1]=((dm[s1][s2])/(max))*100;
2315                  }
2316              POS[col1]=T1[ntree]=compute_std_tree_2 ( A, dm_int, "_TMODE_upgma");
2317              fprintf (tl, "\n>Tree_%d Column\n", col1+1);
2318              print_tree (T1[ntree], "newick", tl);
2319              ntree++;
2320            }
2321          
2322          vfclose (tl);
2323          if (!ntree)
2324            {
2325               fprintf ( stderr, "\nERROR: No suitable pair of column supporting a tree [FATAL]\n", (A->name[a]));
2326               exit (EXIT_SUCCESS);
2327            }
2328          
2329          score=treelist2avg_treecmp (T1, NULL);
2330          
2331          display_output_filename( stdout,"TreeList","newick",tot_pos_list, CHECK);
2332          
2333          if ((BT10=treelist2filtered_bootstrap (T1, NULL,score, 0.1)))
2334            {
2335              vfclose (print_tree (BT10,"newick", vfopen (struc_tree10, "w")));
2336              display_output_filename( stdout,"Tree","newick",struc_tree10, CHECK);
2337            }
2338          
2339          if ((BT50=treelist2filtered_bootstrap (T1, NULL, score,0.5)))
2340            {
2341              vfclose (print_tree (BT50,"newick", vfopen (struc_tree50, "w")));
2342              display_output_filename( stdout,"Tree","newick",struc_tree50, CHECK);
2343            }
2344          
2345          if ((BT100=treelist2filtered_bootstrap (T1, NULL,score, 1.0)))
2346            {
2347              vfclose (print_tree (BT100,"newick", vfopen (struc_tree100, "w")));
2348              display_output_filename( stdout,"Tree","newick",struc_tree100, CHECK);
2349            }
2350          
2351          
2352          RBT=BT100;
2353          if (RBT)
2354            {
2355              B=copy_aln (A, NULL);
2356              for (a=0; a<A->len_aln; a++)
2357                {
2358                  int score;
2359                  Tree_sim *S;
2360                  
2361                  if (POS[a])
2362                    {
2363                      S=tree_cmp (POS[a], RBT);
2364                      score=S->uw/10;
2365                      vfree (S);
2366                    }
2367                  else
2368                    {
2369                      score=NO_COLOR_RESIDUE;
2370                    }
2371                  
2372                  for (b=0; b<B->nseq; b++)
2373                    {
2374                      if ( is_gap (B->seq_al[b][a]) || score == NO_COLOR_RESIDUE)
2375                        {
2376                          B->seq_al[b][a]=NO_COLOR_RESIDUE;
2377                        }
2378                      else
2379                        {
2380                          B->seq_al[b][a]=S->uw/10;
2381                        }
2382                    }
2383                }
2384              
2385              output_format_aln ("score_html", A,B,color_struc_tree);
2386              display_output_filename( stdout,"Colored MSA","score_html",color_struc_tree, CHECK);
2387              free_aln (BA);
2388            }
2389          free_int (pos, -1);
2390          exit (EXIT_SUCCESS);
2391          return NULL;
2392      }
2393
2394 float square_atom ( Atom *X)
2395 {
2396   
2397   return X->x*X->x + X->y*X->y + X->z*X->z;
2398 }
2399 Atom* reframe_atom ( Atom *X, Atom*Y, Atom *Z, Atom *IN, Atom *R)
2400      {
2401        float new_x, new_y, new_z;
2402        
2403        if ( R==NULL)R=vcalloc ( 1, sizeof (Atom));
2404
2405        
2406         new_x= X->x*IN->x + Y->x*IN->y +Z->x*IN->z;
2407         new_y= X->y*IN->x + Y->y*IN->y +Z->y*IN->z;
2408         new_z= X->z*IN->x + Y->z*IN->y +Z->z*IN->z;
2409
2410         R->x=new_x;
2411         R->y=new_y;
2412         R->z=new_z;
2413        return R;
2414      }
2415
2416 Atom* add_atom ( Atom *A, Atom*B, Atom *R)
2417 {
2418   if ( R==NULL)R=vcalloc ( 1, sizeof (Atom));
2419   
2420   R->x=A->x+B->x;
2421   R->y=A->y+B->y;
2422   R->z=A->z+B->z;
2423   
2424   return R;
2425 }
2426 Atom* diff_atom ( Atom *A, Atom*B, Atom *R)
2427 {
2428   if ( R==NULL)R=vcalloc ( 1, sizeof (Atom));
2429   
2430   R->x=A->x-B->x;
2431   R->y=A->y-B->y;
2432   R->z=A->z-B->z;
2433   
2434   return R;
2435 }
2436
2437 Atom * copy_atom ( Atom *A, Atom*R)
2438 {
2439   if ( R==NULL)R=vcalloc ( 1, sizeof (Atom));
2440   R->num=A->num;
2441   R->res_num=A->res_num;
2442   R->x=A->x;
2443   R->y=A->y;
2444   R->z=A->z;
2445   
2446   sprintf( R->type, "%s", A->type);
2447   return R;
2448 }
2449  void print_atom (Atom *A)
2450 {
2451   fprintf ( stdout, "%.2f %.2f %.2f", A->x, A->y, A->z);
2452 }
2453 /************************************************************************/
2454 /*                                                                      */
2455 /*                            NUSSINOV                                  */
2456 /*                                                                      */
2457 /************************************************************************/
2458
2459 /*---------prototypes ----------*/
2460 static void computeBasePairMatrix(int**M,char*S,int l, int T);
2461 static int backtrack(int a,int b,int**M,char*S,char*P, int T);
2462
2463
2464
2465 static int basePair(char x, char y)
2466 {
2467   static short **mat;
2468
2469   if (!mat)
2470     {
2471       char alp[20];
2472       int a, b, c1, c2, lc1, lc2;
2473       mat=declare_short (256, 256);
2474       sprintf ( alp, "AGCTUagctu");
2475       for (a=0; a<strlen (alp); a++)
2476         {
2477           for (b=a; b<strlen (alp)-1; b++)
2478             {
2479               c1=alp[a];c2=alp[b];
2480               lc1=tolower(c1); lc2=tolower(c2);
2481               if ( lc1=='g' && lc2=='c')
2482                 mat[c1][c2]=1;
2483               else if ( lc1=='a' && lc2=='u')
2484                 mat[c1][c2]=1;
2485               else if ( lc1=='u' && lc2=='g')
2486                 mat [c1][c2]=1;
2487               mat[c2][c1]=mat[c1][c2];
2488             }
2489         }
2490     }
2491   return (int)mat[(int)x][(int)y];
2492 }
2493   
2494   
2495
2496 /* ------------------------------------------------------------ */
2497
2498 char *nussinov(char *S, int THRESHOLD)
2499 {
2500   char *paren; 
2501   int i;
2502
2503   /*-------------------------------
2504     S is RNA sequence
2505     paren is parenthesis expression for 
2506     optimal RNA secondary structure
2507     THRESHOLD: Min distance between two paired residues
2508     -------------------------------*/
2509
2510   int **numBasePairs;
2511    int n;
2512    
2513    /*----- initialization  --*/ 
2514    n = strlen(S);
2515    paren=vcalloc (n+1, sizeof (char));
2516    numBasePairs=declare_int (n,n);
2517    
2518    for (i=0;i<n;i++) paren[i]='.';
2519    paren[n]='\0'; // paren is string of same length as S
2520    computeBasePairMatrix(numBasePairs,S,n, THRESHOLD);
2521    backtrack(0,n-1,numBasePairs,S,paren, THRESHOLD);
2522    free_int (numBasePairs, -1);
2523    return paren;
2524 }
2525
2526 static void computeBasePairMatrix(int** numBasePairs,char *S,int n, int THRESHOLD)
2527 {   
2528    int i,j,d,k,max,val,index;
2529
2530    for (d = THRESHOLD; d < n; d++){
2531      for(i=0; i < n; i++)
2532        {
2533         j=i+d;
2534         if (j < n){ 
2535           max=0;
2536           index=n; 
2537            /*-------------------------------------
2538            if index<n at end of for-loop, then this
2539            means that index and j form a base pair,
2540            and this is noted by numBasePairs[j][i]=index.
2541            if index=n at end of for-loop, then this
2542            means that j is not base paired.
2543            -------------------------------------*/
2544
2545           if ( numBasePairs[i][j-1]>max ){
2546              max = numBasePairs[i][j-1];
2547              index = n;
2548              // j not basepaired with some k such that i<k<j
2549           }
2550
2551           val = basePair(S[i],S[j]) + numBasePairs[i+1][j-1]; 
2552           if ( j-i<= THRESHOLD && val > max ){
2553              max = val;
2554              index=i;
2555           }
2556           for(k=i; k<=j-THRESHOLD; k++){ 
2557              val = basePair(S[k],S[j]) + numBasePairs[i][k-1] 
2558                    + numBasePairs[k+1][j-1];
2559              if (val > max) {
2560                 max = val;
2561                 index=k;
2562              }
2563           }
2564           numBasePairs[i][j]=max;
2565           if (index<n) 
2566              numBasePairs[j][i]=index;
2567           else
2568              numBasePairs[j][i]=-1;
2569        } 
2570      } 
2571    } 
2572
2573 }
2574
2575
2576   
2577
2578 static int backtrack(int i, int j, int **numBasePairs,char *S, char *paren, int THRESHOLD)
2579 {
2580   int k;
2581
2582    k = numBasePairs[j][i];
2583    if(k != -1)  
2584      {
2585      paren[k] = '('; 
2586      paren[j] = ')';
2587      if( THRESHOLD <= (j-1)-(k+1) )
2588        backtrack(k+1,j-1,numBasePairs,S,paren, THRESHOLD);
2589      if (THRESHOLD <= k-1-i  )
2590        backtrack(i,k-1,numBasePairs,S,paren, THRESHOLD);
2591      } 
2592    else{ 
2593      if( THRESHOLD <= j-1-i )
2594        {
2595          backtrack(i,j-1,numBasePairs,S,paren, THRESHOLD);
2596        }
2597      else 
2598        return 0;
2599    }  
2600    return 0;}   
2601
2602 int count;
2603 char * rna_struc2rna_lib ( char *seq_name, char *seq, char *name)
2604 {
2605   FILE *fp;
2606   char *st;
2607
2608   
2609   st=nussinov (seq, 2);
2610   if ( name==NULL)name=vtmpnam(NULL);
2611   fp=vfopen ( name, "w");
2612   fprintf (fp, "! TC_LIB_FORMAT_01\n");
2613   fprintf (fp, "1\n%s %d %s\n", seq_name, strlen (seq), seq);
2614   fprintf (fp, "#1 1\n");
2615   display_rna_ss (0, seq, st, fp);
2616   fprintf ( fp, "! SEQ_1_TO_N\n");
2617   vfclose (fp);
2618   vfree (st);
2619   //printf_system ( "cp %s test", name);
2620   return name;
2621 }
2622 int display_rna_ss ( int n, char *seq, char *st, FILE *fp)
2623 {
2624   char p;
2625   char string[100];
2626   static int thread;
2627   
2628   while ((p=st[n])!='\0')
2629     {
2630       if ( p=='(')
2631         {
2632           thread=count++;
2633           sprintf (string, "%d",n+1);
2634           n=display_rna_ss (n+1, seq, st, fp);
2635           fprintf (fp, "%s %d 100\n", string, n+1);
2636         }
2637       else if (p=='.');
2638       else if (p==')')
2639         {
2640           return n;
2641         }
2642       n++;
2643     }
2644   return n;
2645 }
2646 /*********************************COPYRIGHT NOTICE**********************************/
2647 /*© Centro de Regulacio Genomica */
2648 /*and */
2649 /*Cedric Notredame */
2650 /*Tue Oct 27 10:12:26 WEST 2009. */
2651 /*All rights reserved.*/
2652 /*This file is part of T-COFFEE.*/
2653 /**/
2654 /*    T-COFFEE is free software; you can redistribute it and/or modify*/
2655 /*    it under the terms of the GNU General Public License as published by*/
2656 /*    the Free Software Foundation; either version 2 of the License, or*/
2657 /*    (at your option) any later version.*/
2658 /**/
2659 /*    T-COFFEE is distributed in the hope that it will be useful,*/
2660 /*    but WITHOUT ANY WARRANTY; without even the implied warranty of*/
2661 /*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the*/
2662 /*    GNU General Public License for more details.*/
2663 /**/
2664 /*    You should have received a copy of the GNU General Public License*/
2665 /*    along with Foobar; if not, write to the Free Software*/
2666 /*    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA*/
2667 /*...............................................                                                                                      |*/
2668 /*  If you need some more information*/
2669 /*  cedric.notredame@europe.com*/
2670 /*...............................................                                                                                                                                     |*/
2671 /**/
2672 /**/
2673 /*      */
2674 /*********************************COPYRIGHT NOTICE**********************************/