8 #include "io_lib_header.h"
9 #include "util_lib_header.h"
10 #include "dp_lib_header.h"
11 #include "define_header.h"
14 int aln_compare ( int argc, char *argv[])
19 Sequence *SA, *SB, *TOT_SEQ=NULL;
20 Sequence *defined_residueA;
21 Sequence *defined_residueB;
35 char compare_mode[STRING];
37 char *alignment1_file;
38 char *alignment2_file;
43 char io_format[STRING];
54 int output_aln_threshold;
55 char output_aln_file[LONG_STRING];
56 char output_aln_format[LONG_STRING];
57 char output_aln_modif[LONG_STRING];
59 Constraint_list *CL_A;
60 Constraint_list *CL_B;
61 CLIST_TYPE *clist_entry;
66 /*Column Comparison Variables*/
84 int *n_sub_categories;
89 int **aln_output_count;
95 char sim_matrix[STRING];
99 char *sim_category_list;
100 int *sim_n_sub_categories;
103 if ( argc==1|| strm6 ( argv[1], "h", "-h", "help", "-help", "-man", "?"))
105 output_informations();
108 argv=standard_initialisation (argv, &argc);
109 /*Declarations and Initializations*/
110 alignment1_file=vcalloc ( LONG_STRING, sizeof (char));
111 alignment2_file=vcalloc ( LONG_STRING, sizeof (char));
113 pep1_file=vcalloc ( LONG_STRING, sizeof (char));
114 pep2_file=vcalloc ( LONG_STRING, sizeof (char));
117 sprintf (compare_mode, "sp");
123 grep_list=vcalloc ( STRING, sizeof (char**));
124 for ( a=0; a< STRING; a++)grep_list[a]=declare_char (3, STRING);
128 struct_file=declare_char ( MAX_N_STRUC, LONG_STRING);
129 struct_format=declare_char (MAX_N_STRUC, STRING);
131 n_symbol=vcalloc ( MAX_N_STRUC, sizeof (int));
132 symbol_list=vcalloc (MAX_N_STRUC, sizeof (char**));
133 for ( a=0; a< MAX_N_STRUC; a++)symbol_list[a]=declare_char ( 100, 100);
139 category=vcalloc ( MAX_N_CATEGORIES, sizeof (char**));
140 for ( a=0; a< MAX_N_CATEGORIES; a++)category[a]=declare_char(100, STRING);
141 n_sub_categories=vcalloc ( 100, sizeof (int));
142 category_list=vcalloc ( LONG_STRING, sizeof (char));
143 sprintf ( category_list, "[*][*]=[ALL]");
147 sim_category=vcalloc ( MAX_N_CATEGORIES, sizeof (char**));
148 for ( a=0; a< MAX_N_CATEGORIES; a++)sim_category[a]=declare_char(100, STRING);
149 sim_n_sub_categories=vcalloc ( 100, sizeof (int));
150 sim_category_list=vcalloc ( LONG_STRING, sizeof (char));
151 sprintf ( sim_category_list, "[*][*]=[ALL]");
152 sprintf ( sim_aln, "al1");
154 sprintf ( sim_category_list, "[*][*]=[ALL]");
156 sprintf ( io_format, "ht");
157 sprintf ( io_file, "stdout");
162 output_aln_threshold=100;
163 sprintf ( output_aln_file, "stdout");
164 sprintf ( output_aln_format, "clustalw");
165 sprintf ( output_aln_modif, "lower");
166 /*END OF INITIALIZATION*/
173 for ( a=1; a< argc; a++)
175 if (strcmp ( argv[a], "-f")==0)
177 sprintf (io_file,"%s", argv[++a]);
179 else if ( strcmp ( argv[a], "-sim_aln")==0)
181 sprintf (sim_aln,"%s", argv[++a]);
183 else if ( strcmp ( argv[a], "-sim_matrix")==0)
185 sprintf (sim_matrix,"%s", argv[++a]);
187 else if ( strm ( argv[a], "-compare_mode"))
189 sprintf ( compare_mode, "%s", argv[++a]);
191 else if ( strcmp ( argv[a], "-sim_cat")==0)
193 if ( argv[++a][0]!='[')
195 if ( strcmp ( argv[a], "3d_ali")==0)sprintf ( sim_category_list, "[b][b]+[h][h]=[struc]");
198 fprintf ( stderr, "\n%s: Unknown category for distance measure", argv[a]);
204 sprintf ( sim_category_list, "%s", argv[a]);
207 else if ( strcmp ( argv[a], "-grep_value")==0)
209 sprintf ( grep_list[n_greps][0], "%s", argv[++a]);
210 sprintf ( grep_list[n_greps][1], "%s", argv[++a]);
214 else if ( strcmp ( argv[a], "-al1")==0)
217 sprintf ( alignment1_file, "%s", argv[++a]);
220 else if ( strcmp ( argv[a], "-al2")==0)
223 sprintf ( alignment2_file, "%s", argv[++a]);
226 else if ( strcmp ( argv[a], "-pep1")==0)
229 sprintf ( pep1_file, "%s", argv[++a]);
231 else if ( strcmp ( argv[a], "-pep2")==0)
234 sprintf ( pep2_file, "%s", argv[++a]);
236 else if ( strcmp ( argv[a], "-pep")==0)
240 else if ( strcmp ( argv[a], "-count")==0)
244 else if ( strcmp ( argv[a], "-output_aln")==0)
248 else if ( strcmp ( argv[a], "-output_aln_threshold")==0)
250 output_aln_threshold=atoi(argv[++a]);
252 else if ( strcmp ( argv[a], "-output_aln_file")==0)
254 sprintf (output_aln_file,"%s",argv[++a]);
256 else if ( strcmp ( argv[a], "-output_aln_format")==0)
258 sprintf (output_aln_format,"%s",argv[++a]);
260 else if ( strcmp ( argv[a], "-output_aln_modif")==0)
262 sprintf (output_aln_modif,"%s",argv[++a]);
264 else if ( strcmp ( argv[a], "-st")==0)
266 sprintf ( struct_file [n_structure], "%s", argv[++a]);
267 if (!NEXT_ARG_IS_FLAG && is_a_struc_format (argv[a+1]))
268 sprintf ( struct_format[n_structure], "%s", argv[++a]);
270 sprintf ( struct_format[n_structure], "%s", "pep");
272 if ( !NEXT_ARG_IS_FLAG && strcmp ( argv[a+1], "conv")==0)
275 while(!NEXT_ARG_IS_FLAG)
277 sprintf ( symbol_list[n_structure][n_symbol[n_structure]], "%s", argv[++a]);
278 n_symbol[n_structure]++;
281 else if (!NEXT_ARG_IS_FLAG)
283 symbol_list[n_structure]=make_symbols ( argv[++a], &n_symbol[n_structure]);
288 symbol_list[n_structure]=make_symbols ( "any", &n_symbol[n_structure]);
294 else if ( strcmp (argv[a], "-sep")==0)
296 if ( !NEXT_ARG_IS_FLAG)
297 get_separating_char ( argv[++a][0], &sep_l, &sep_r);
301 else if ( strncmp ( argv[a], "-io_format",5)==0)
303 sprintf ( io_format, "%s", argv[++a]);
305 else if ( strcmp ( argv[a], "-io_cat")==0)
307 if ( argv[++a][0]!='[')
309 if ( strcmp ( argv[a], "3d_ali")==0)sprintf ( category_list, "[b][b]+[h][h]=[struc];[*][*]=[tot]");
313 sprintf ( category_list, "%s", argv[a]);
318 fprintf ( stdout, "\nOPTION %s UNKNOWN[FATAL]\n", argv[a]);
319 myexit (EXIT_FAILURE);
323 /*PARAMETER PROCESSING*/
325 if ( pep_compare==1 || count==1)aln_compare=0;
326 if ( aln_compare==1)pep_compare=0;
328 /*READ THE TOTAL SEQUENCES*/
329 seq_list=declare_char ( 100,STRING);
332 if ( alignment1_file[0] && !check_file_exists ( alignment1_file))
334 fprintf (stderr, "\nERROR: %s DOES NOT EXIST[FATAL:%s]\n", alignment1_file, PROGRAM);
335 myexit(EXIT_FAILURE);
337 if ( alignment2_file[0] && !check_file_exists ( alignment2_file))
339 fprintf (stderr, "\nERROR: %s DOES NOT EXIST[FATAL:%s]\n", alignment2_file, PROGRAM);
340 myexit(EXIT_FAILURE);
342 if ( pep1_file[0] && !check_file_exists ( pep1_file))
344 fprintf (stderr, "\nERROR: %s DOES NOT EXIST[FATAL:%s]\n", pep1_file, PROGRAM);
345 myexit(EXIT_FAILURE);
347 if ( pep2_file[0] && !check_file_exists ( pep2_file))
349 fprintf (stderr, "\nERROR: %s DOES NOT EXIST[FATAL:%s]\n", pep2_file, PROGRAM);
350 myexit(EXIT_FAILURE);
353 if ( alignment1_file[0])sprintf ( seq_list[n_seq_file++], "A%s", alignment1_file);
354 if ( alignment2_file[0])sprintf ( seq_list[n_seq_file++], "A%s", alignment2_file);
355 if ( pep1_file[0])sprintf ( seq_list[n_seq_file++], "S%s", pep1_file);
356 if ( pep2_file[0])sprintf ( seq_list[n_seq_file++], "S%s", pep2_file);
360 TOT_SEQ=read_seq_in_n_list ( seq_list, n_seq_file, NULL, NULL);
362 A=declare_aln (TOT_SEQ);
363 B=declare_aln (TOT_SEQ);
366 /*1 COMPARISON OF THE SEQUENCES*/
367 if ( pep_compare==1 || count==1)
371 if ( pep1_file[0]!='\0')SA=main_read_seq (pep1_file);
372 else if (alignment1_file[0]!='\0')
374 main_read_aln ( alignment1_file, A);
379 main_read_aln ("stdin", A);
380 sprintf ( alignment1_file, "stdin");
383 if ( pep2_file[0]!='\0')SB=main_read_seq (pep2_file);
384 else if (alignment2_file[0]!='\0')
386 main_read_aln ( alignment2_file, B);
393 sprintf ( alignment2_file, "%s", alignment1_file );
395 buf1=(pep1_file[0]!='\0')?pep1_file: alignment1_file;
396 buf2=(pep2_file[0]!='\0')?pep2_file: alignment2_file;
397 /*Output of the Results*/
399 fp=vfopen ( io_file, "w");
404 fprintf (fp, "Number of seq: %d %d\n", SA->nseq,SA->nseq);
405 for ( a=0; a< SA->nseq; a++) fprintf (fp, "%-15s %d %d\n", SA->name[a], (int)strlen (SA->seq[a]), (int)strlen (SA->seq[a]));
408 if (SA->nseq!=SB->nseq)
411 fprintf ( fp, "DIFFERENCE TYPE 1: Different number of sequences %3d/%3d",SA->nseq,SB->nseq);
416 for ( a=0; a< SA->nseq; a++)
418 lower_string (SA->seq[a]);
419 lower_string (SB->seq[a]);
423 if ( strcmp ( SA->seq[a], SB->seq[a])!=0)
425 fprintf ( fp, "DIFFERENCE TYPE 2: %s is different in the 2 files\n", SA->name[a]);
429 for ( a=0; a< SA->nseq; a++)
431 lower_string (SA->seq[a]);
432 lower_string (SB->seq[a]);
436 if ( strlen ( SA->seq[a])!= strlen (SB->seq[a]))
438 fprintf ( fp, "DIFFERENCE TYPE 3: %s has != length in the 2 files (%d-%d)\n", SA->name[a],(int)strlen ( SA->seq[a]), (int)strlen (SB->seq[a]));
444 fprintf ( fp, "\nDIFFERENCES found between:\n\t%s\n\t%s\n**********\n\n",buf1, buf2);
449 /*2 COMPARISON OF THE ALIGNMENTS*/
450 else if ( aln_compare==1)
453 n_categories=parse_category_list ( category_list, category, n_sub_categories);
454 sim_n_categories=parse_category_list ( sim_category_list, sim_category, sim_n_sub_categories);
456 main_read_aln ( alignment1_file, A);
457 main_read_aln ( alignment2_file, B);
458 CLS=trim_aln_seq ( A, B);
461 defined_residueA=get_defined_residues (A);
462 defined_residueB=get_defined_residues (B);
465 A=thread_defined_residues_on_aln(A, defined_residueA);
466 A=thread_defined_residues_on_aln(A, defined_residueB);
467 B=thread_defined_residues_on_aln(B, defined_residueA);
468 B=thread_defined_residues_on_aln(B, defined_residueB);
471 CL_A=declare_constraint_list ( CLS, NULL, NULL, 0, NULL, NULL);
472 CL_B=declare_constraint_list ( CLS, NULL, NULL, 0, NULL, NULL);
475 CL_A=aln2constraint_list (A,CL_A, "sim");
476 CL_B=aln2constraint_list (B,CL_B, "sim");
478 clist_entry=vcalloc ( CL_A->entry_len, CL_A->el_size);
480 glob=vcalloc ( A->nseq+1, sizeof (int));
481 pw_glob=declare_int ( A->nseq+1, A->nseq+1);
484 if ( strm( compare_mode, "sp"))
486 for ( b=0,a=0; a<CL_A->ne; a++)
488 s1=vread_clist(CL_A, a, SEQ1);
489 s2=vread_clist(CL_A, a, SEQ2);
490 clist_entry=extract_entry ( clist_entry, a, CL_A);
498 clist_entry=extract_entry ( clist_entry, a, CL_A);
500 if ((main_search_in_list_constraint (clist_entry,&pos_in_clist,4,CL_B))!=NULL)
502 vwrite_clist ( CL_A, a, MISC, 1);
507 vwrite_clist ( CL_A, a, MISC, 0);
511 else if ( strm( compare_mode, "column"))
513 posA=aln2pos_simple_2(A);
514 posB=aln2pos_simple_2(B);
515 seq_cache=declare_int ( A->nseq, A->len_aln+1);
516 for ( n=0,a=0; a< A->len_aln; a++)
517 for ( b=0; b<B->len_aln; b++)
519 is_same=compare_pos_column(posA, a, posB, b, A->nseq);
524 for (c=0; c< A->nseq;c++)if ( posA[c][a]>0)seq_cache[c][posA[c][a]]=1;
528 for ( a=0,b=0; a< CL_A->ne; a++)
530 s1=vread_clist(CL_A, a, SEQ1);
531 s2=vread_clist(CL_A, a, SEQ2);
537 r1=vread_clist(CL_A, a, R1);
538 if (seq_cache[s1][r1]){b++;vwrite_clist ( CL_A, a, MISC, 1);}
542 free_int (seq_cache, -1);
546 for ( a=0; a< n_structure; a++)
548 ST=read_structure (struct_file[a],struct_format[a], A,B,ST,n_symbol[a], symbol_list[a]);
551 /*RESULT ARRAY DECLARATION*/
553 tot_count=declare_int (n_categories+1, A->nseq+1);
554 pos_count=declare_int (n_categories+1, A->nseq+1);
555 pw_tot_count=vcalloc ( A->nseq, sizeof (int**));
556 for ( a=0; a< A->nseq; a++)pw_tot_count[a]=declare_int ( A->nseq, n_categories);
559 pw_pos_count=vcalloc ( A->nseq, sizeof (int**));
560 for ( a=0; a< A->nseq; a++)pw_pos_count[a]=declare_int ( A->nseq, n_categories);
562 /*COMPARISON MODULE*/
563 for ( a=0; a< n_categories; a++)
565 for (b=0; b<CL_A->ne; b++)
567 s1=vread_clist(CL_A, b, SEQ1);
568 s2=vread_clist(CL_A, b, SEQ2);
570 r1=vread_clist(CL_A, b, R1);
571 r2=vread_clist(CL_A, b, R2);
573 c=vread_clist(CL_A, b, MISC);
575 if ( is_in_struct_category ( s1, s2, r1, r2, ST, category[a], n_sub_categories[a]))
579 tot_count[a][s1+1]++;
580 tot_count[a][s2+1]++;
581 pw_tot_count[s1][s2][a]++;
582 pw_tot_count[s2][s1][a]++;
585 pw_pos_count[s1][s2][a]++;
586 pw_pos_count[s2][s1][a]++;
588 pos_count[a][s1+1]++;
589 pos_count[a][s2+1]++;
600 /*Measure of Aligned Sequences Similarity*/
602 sim=get_aln_compare_sim ((strcmp (sim_aln, "al1")==0)?A:B, ST,sim_category[0], sim_n_sub_categories[0], sim_matrix);
603 sim_param=analyse_sim ((strcmp (sim_aln, "al1")==0)?A:B, sim);
606 /*Fill the Result_structure*/
607 R=vcalloc ( 1, sizeof (Result));
609 R->grep_list=grep_list;
617 R->alignment1_file=alignment1_file;
618 R->alignment2_file=alignment2_file;
619 R->io_format=io_format;
620 R->n_structure=n_structure;
621 R->struct_file=struct_file;
622 R->struct_format=struct_format;
623 R->n_symbol=n_symbol;
624 R->symbol_list=symbol_list;
627 R->tot_count=tot_count;
628 R->pos_count=pos_count;
629 R->pw_tot_count=pw_tot_count;
630 R->pw_pos_count=pw_pos_count;
633 R->n_categories=n_categories;
634 R->category=category;
635 R->category_list=category_list;
636 R->n_sub_categories=n_sub_categories;
638 R->sim_param=sim_param;
639 R->sim_matrix=sim_matrix;
640 R->sim_n_categories=sim_n_categories;
641 R->sim_category=sim_category;
642 R->sim_category_list=sim_category_list;
643 R->sim_n_sub_categories=sim_n_sub_categories;
647 /*Output of the Results*/
649 fp=vfopen ( io_file, "w");
650 fp=output_format (io_format, fp, R);
654 /*Rewriting of Alignment A*/
658 aln_output_tot =declare_int ( A->nseq, A->len_aln+1);
659 aln_output_count=declare_int ( A->nseq, A->len_aln+1);
661 for ( a=0; a< CL_A->ne; a++)
663 clist_entry=extract_entry ( clist_entry, a, CL_A);
664 aln_output_tot[clist_entry[SEQ1]][clist_entry[R1]]++;
665 aln_output_tot[clist_entry[SEQ2]][clist_entry[R2]]++;
667 aln_output_count[clist_entry[SEQ1]][clist_entry[R1]]+=clist_entry[MISC];
668 aln_output_count[clist_entry[SEQ2]][clist_entry[R2]]+=clist_entry[MISC];
670 for ( a=0; a< A->nseq; a++)
673 for (c=0, b=0; b< A->len_aln; b++)
675 if ( !is_gap(A->seq_al[a][b]))
678 if ( aln_output_tot[a][c] && ((aln_output_count[a][c]*100)/aln_output_tot[a][c])<output_aln_threshold)
680 if ( strm (output_aln_modif, "lower"))
681 A->seq_al[a][b]=tolower(A->seq_al[a][b]);
683 A->seq_al[a][b]=output_aln_modif[0];
685 else A->seq_al[a][b]=toupper(A->seq_al[a][b]);
690 A->score_aln=(int)(R->tot_count[0][0]==0)?0:((R->pos_count[0][0]*100)/(float)R->tot_count[0][0]);
692 output_format_aln (output_aln_format,A,NULL,output_aln_file);
694 free_int ( aln_output_tot, -1);
695 free_int ( aln_output_count, -1 );
700 /************************************************************************************/
705 /************************************************************************************/
706 FILE *output_format (char *iof,FILE *fp, Result *R)
715 t: total results (global);
720 for ( a=0; a< l; a++)
722 if ( iof[a]=='H')fp=output_large_header (fp,R);
723 else if ( iof[a]=='h')fp=output_header (fp,R);
724 else if ( iof[a]=='t')fp=output_total_results (fp, R);
725 else if ( iof[a]=='s')fp=output_sequence_results (fp,R);
726 else if ( iof[a]=='p')fp=output_pair_wise_sequence_results(fp,R);
731 FILE *output_pair_wise_sequence_results (FILE *fp, Result *R)
736 for ( c=0; c<(R->A)->nseq-1; c++)
738 for ( d=c+1; d< (R->A)->nseq; d++)
740 fprintf (fp, "%-10s %-10s%s",(R->A)->name[c],(R->A)->name[d],SSPACE);
741 fprintf (fp, "%5.1f%s", R->sim[c][d], SSPACE);
743 for (a=0; a< R->n_categories; a++)
745 fprintf ( fp, "%5.1f ",(R->pw_tot_count[c][d][a]==0)?0:((float)(R->pw_pos_count[c][d][a]*100)/(float)R->pw_tot_count[c][d][a]));
746 fprintf ( fp, "%c%5.1f%c%s",(R->sep_l),(R->pw_glob[c][d]==0)?0:((float)(R->pw_tot_count[c][d][a]*100)/(float)R->pw_glob[c][d]),(R->sep_r),SSPACE);
748 fprintf ( fp, "%c%5d%c\n",(R->sep_l), R->pw_glob[c][d],(R->sep_r));
754 FILE *output_sequence_results (FILE *fp, Result *R)
758 for ( c=1; c<=R->A->nseq; c++)
760 fprintf (fp, "%-10s %-10s%s",(R->A)->name[c-1], "..",SSPACE);
761 fprintf (fp, "%5.1f%s", R->sim_param[c-1][0],SSPACE);
762 for (a=0; a< R->n_categories; a++)
764 fprintf ( fp, "%5.1f ",(R->tot_count[a][c]==0)?0:((float)(R->pos_count[a][c]*100)/(float)R->tot_count[a][c]));
765 fprintf ( fp, "%c%5.1f%c%s",(R->sep_l),(R->glob[c]==0)?0:((float)(R->tot_count[a][c]*100)/(float)R->glob[c]),(R->sep_r), SSPACE);
767 fprintf ( fp, "%c%5d%c\n",(R->sep_l), R->glob[c],(R->sep_r));
773 FILE *output_total_results (FILE *fp, Result *R)
778 fprintf ( fp, "%-13s %-7d%s",extract_suffixe (R->alignment1_file),(R->A)->nseq, SSPACE);
779 fprintf (fp, "%5.1f%s", R->sim_param[(R->A)->nseq][0], SSPACE);
780 for (a=0; a< R->n_categories; a++)
782 fprintf ( fp, "%5.1f ",(R->tot_count[a][0]==0)?0:((float)(R->pos_count[a][0]*100)/(float)R->tot_count[a][0]));
783 fprintf ( fp, "%c%5.1f%c%s",(R->sep_l),(R->glob[0]==0)?0:((float)(R->tot_count[a][0]*100)/(float)R->glob[0]),(R->sep_r), SSPACE);
785 fprintf ( fp, "%c%5d%c\n",(R->sep_l), R->glob[0],(R->sep_r));
788 FILE *output_header (FILE *fp, Result *R)
793 fprintf ( fp, "%s\n",generate_string ( R->n_categories*(13+strlen(SSPACE))+31+2*strlen(SSPACE),'*'));
795 fprintf ( fp, "%-10s %-10s %s%-3s%s", "seq1", "seq2",SSPACE,"Sim",SSPACE);
797 for ( a=0; a< R->n_categories; a++)
798 fprintf ( fp, "%-12s%s ",R->category[a][0], SSPACE);
799 fprintf (fp, "%-5s", "Tot");
803 FILE *output_large_header ( FILE *fp, Result *R)
807 fprintf ( fp, "AL1: %s\n", R->alignment1_file);
808 fprintf ( fp, "AL2: %s\n", R->alignment2_file);
809 for ( a=0; a< R->n_structure; a++)
811 fprintf (fp, "ST %d: %s [%s]/[", a, R->struct_file[a], R->struct_format[a]);
812 for ( b=0; b< R->n_symbol[a]; b++)fprintf (fp, "%s ", R->symbol_list[a][b]);
813 fprintf ( fp, "]\n");
818 void get_separating_char ( char s, char *l, char *r)
820 if ( s=='{' || s=='}')
826 else if ( s==']' || s=='[')
832 else if ( s==')' || s=='(')
846 /************************************************************************************/
851 /************************************************************************************/
852 float **get_aln_compare_sim ( Alignment *A, Structure *ST, char **cat, int n_cat, char *matrix)
862 sim=declare_float ( A->nseq, A->nseq);
864 for ( a=0; a<A->nseq-1; a++)
866 for (b=a+1; b< A->nseq; b++)
868 for ( r1=0, r2=0,tot=0, pos=0,c=0; c< A->len_aln; c++)
870 p1=is_gap(A->seq_al[a][c]);
871 p2=is_gap(A->seq_al[b][c]);
879 if (is_in_struct_category (a, b, r1, r2, ST, cat, n_cat))
882 pos+=is_in_same_group_aa ( cr1, cr2, 0, NULL, matrix);
886 sim[a][b]=sim[b][a]=(tot==0)?0:((pos*100)/tot);
891 float **analyse_sim ( Alignment *A, float **sim)
898 an=declare_float ( A->nseq+1,2);
900 for (d=0, a=0; a< A->nseq;a++)
902 for ( b=0; b< A->nseq; b++)
907 an[A->nseq][0]+=sim[a][b];
911 an[a][0]=((float)an[a][0]/(float)(A->nseq-1));
915 an[A->nseq][0]=an[A->nseq][0]/d;
917 for ( d=0,a=0; a< A->nseq; a++)
919 for ( b=0; b< A->nseq; b++)
923 c=an[a][0]-sim[a][b];
924 an[a][1]+=(c>0)?c:-c;
925 an[A->nseq][1]+=(c>0)?c:-c;
929 an[a][1]=((float)an[a][1]/(float)(A->nseq-1));
932 an[A->nseq][1]=an[A->nseq][1]/d;
944 /************************************************************************************/
949 /************************************************************************************/
950 int is_in_struct_category ( int s1, int s2, int r1, int r2, Structure *ST, char **cat, int n_cat)
953 static char *struc_r1;
954 static char *struc_r2;
962 if ( ST==NULL)return 1;
969 if ( r==NULL)r=declare_int (2, 2);
970 else r[0][0]=r[1][1]=r[1][0]=r[0][1]=0;
973 struc_r1=get_structure_residue ( s1, r1, ST);
974 struc_r2=get_structure_residue ( s2, r2, ST);
977 for ( a=1; a< n_cat; a+=2)
979 sprintf ( first, "%s", cat[a]);
980 sprintf ( second,"%s", cat[a+1]);
981 r[0][0]=struc_matches_pattern ( struc_r1, first);
982 r[0][1]=struc_matches_pattern ( struc_r2, first);
983 r[1][0]=struc_matches_pattern ( struc_r1, second);
984 r[1][1]=struc_matches_pattern ( struc_r2, second);
986 if ( (r[0][0]&&r[1][1])||(r[1][0]&&r[0][1]))return 1;
991 char * get_structure_residue (int seq, int res, Structure *ST)
996 s=vcalloc ( ST->n_fields+1, sizeof (char));
997 for (a=0; a< ST->n_fields; a++)
998 s[a]=ST->struc[seq][res][a];
1003 int struc_matches_pattern ( char *struc, char *pattern)
1011 sprintf ( p, "%s", pattern);
1013 if ( strcmp (p, "*")==0)return 1;
1019 for ( b=0; b<l; b++)
1022 if ( strcmp ( y, "*")==0);
1023 else if ( strchr(y, struc[b])==NULL)return 0;
1024 y=strtok ( NULL, ".");
1026 if ( y==NULL && b<(l-1))
1028 crash ( "\nA Structure is Missing[FATAL]");
1037 Structure * read_structure (char *fname, char *format, Alignment *A,Alignment *B, Structure *ST, int n_symbols, char **symbol_table)
1040 Alignment *StrucAln;
1041 Sequence *SEQ=NULL, *SA=NULL;
1046 if (ST==NULL)ST=declare_structure (A->nseq, A->seq_al);
1048 ST=extend_structure ( ST);
1050 StrucAln=declare_Alignment(NULL);
1051 if (strm ( format, "pep"))
1053 SEQ=main_read_seq ( fname);
1054 StrucAln=seq2aln(SEQ,StrucAln,0);
1057 else if ( strcmp ( format, "aln")==0)
1059 StrucAln=main_read_aln (fname, StrucAln);
1062 reorder_aln(StrucAln, A->name, A->nseq);
1064 SA=aln2seq(StrucAln);
1065 string_array_convert (SA->seq, SA->nseq, n_symbols, symbol_table);
1069 if (SEQ)free_sequence (SEQ, SEQ->nseq);
1074 int parse_category_list ( char *category_list_in, char ***category, int*n_sub_categories)
1077 char *category_list;
1079 category_list=vcalloc ( strlen(category_list_in)+1, sizeof (char));
1080 sprintf (category_list, "%s", category_list_in);
1084 while ((y=strtok(z, ";"))!=NULL)
1086 sprintf ( category[n++][2], "%s", y);
1091 for ( a=0; a< n; a++)
1093 sprintf (category_list,"%s",strtok(category[a][2], "="));
1094 sprintf (category[a][0],"%s",strtok (NULL, "="));
1095 sprintf ( category[a][++n_sub_categories[a]],"%s", strtok(category_list, "[]+"));
1096 while ( ((y=strtok(NULL, "[]+"))!=NULL))
1098 if ( strcmp (y, "#")==0)y=category[a][n_sub_categories[a]];
1099 sprintf ( category[a][++n_sub_categories[a]],"%s",y);
1106 int is_a_struc_format (char *format)
1108 if ( strcmp ( format, "pep")==0)return 1;
1109 if ( strcmp ( format, "aln")==0)return 1;
1112 /************************************************************************************/
1117 /************************************************************************************/
1118 void output_informations ()
1120 fprintf ( stderr, "\nPROGRAM: %s (%s)\n",PROGRAM,VERSION);
1121 fprintf ( stderr, "******INPUT***************************");
1122 fprintf ( stderr, "\n-al1 al1_file");
1123 fprintf ( stderr, "\n-al2 al2_file");
1124 fprintf ( stderr, "\n-compare_mode [sp] or column");
1125 fprintf ( stderr, "\n-pep (compare only the sequences");
1126 fprintf ( stderr, "\n-count");
1127 fprintf ( stderr, "\n-pep1 pep1_file");
1128 fprintf ( stderr, "\n-pep1 pep2_file");
1129 fprintf ( stderr, "\n-st str_file st_format conversion");
1130 fprintf ( stderr, "\n **st_format: aln, pep");
1131 fprintf ( stderr, "\n **conversion: 3d_ali, conv abcZ #X");
1132 fprintf ( stderr, "\nNOTE: Several structures in a row are possible");
1134 fprintf ( stderr, "\n\n****DISTANCE MEASURE*****************");
1135 fprintf ( stderr, "\n-sim_cat category_format or category_name");
1136 fprintf ( stderr, "\n **category_format: [*][*]=[tot]");
1137 fprintf ( stderr, "\n **category_name : 3d_ali (<=>[h][h]+[e][e]=[Struc]");
1138 fprintf ( stderr, "\n-sim_matrix matrix_name");
1139 fprintf ( stderr, "\n **matrix_name: idmat,pam250mt..");
1140 fprintf ( stderr, "\n-sim_aln al1 or al2");
1141 fprintf ( stderr, "\n\n****COMPARISON***********************");
1142 fprintf ( stderr, "\n-io_cat category_format or category_name");
1143 fprintf ( stderr, "\n **category_format: [*][*]=[tot]");
1144 fprintf ( stderr, "\n **category_name : 3d_ali(<=>[h][h]+[e][e]=[Struc];[*][*]=[Tot]");
1145 fprintf ( stderr, "\n\nNOTE: if two structures:");
1146 fprintf ( stderr, "\n[he.123][#]=[he123VShe123];[beh.*][he.2345]=[other]");
1147 fprintf ( stderr, "\n\n****OUTPUT****************************");
1148 fprintf ( stderr, "\n-f stdout");
1149 fprintf ( stderr, "\n stderr");
1150 fprintf ( stderr, "\n file_name");
1151 fprintf ( stderr, "\n-io_format hts");
1152 fprintf ( stderr, "\n H ->large Header");
1153 fprintf ( stderr, "\n h ->small Header");
1154 fprintf ( stderr, "\n t->global (average)results");
1155 fprintf ( stderr, "\n s ->average results for each sequence");
1156 fprintf ( stderr, "\n p ->results for each pair of sequences");
1157 fprintf ( stderr, "\n-output_aln Outputs al1 with conserved bits in Upper");
1158 fprintf ( stderr, "\n-output_aln_threshold [100]");
1159 fprintf ( stderr, "\n-output_aln_file [stdout]");
1160 fprintf ( stderr, "\n-output_aln_format [clustalw]");
1161 fprintf ( stderr, "\n-output_aln_modif [lower]");
1162 fprintf ( stderr, "\n");
1163 myexit (EXIT_SUCCESS);
1166 /*********************************COPYRIGHT NOTICE**********************************/
1167 /*© Centro de Regulacio Genomica */
1169 /*Cedric Notredame */
1170 /*Tue Jul 1 10:00:54 WEST 2008. */
1171 /*All rights reserved.*/
1172 /*This file is part of T-COFFEE.*/
1174 /* T-COFFEE is free software; you can redistribute it and/or modify*/
1175 /* it under the terms of the GNU General Public License as published by*/
1176 /* the Free Software Foundation; either version 2 of the License, or*/
1177 /* (at your option) any later version.*/
1179 /* T-COFFEE is distributed in the hope that it will be useful,*/
1180 /* but WITHOUT ANY WARRANTY; without even the implied warranty of*/
1181 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/
1182 /* GNU General Public License for more details.*/
1184 /* You should have received a copy of the GNU General Public License*/
1185 /* along with Foobar; if not, write to the Free Software*/
1186 /* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/
1187 /*............................................... |*/
1188 /* If you need some more information*/
1189 /* cedric.notredame@europe.com*/
1190 /*............................................... |*/
1194 /*********************************COPYRIGHT NOTICE**********************************/
1195 /*********************************COPYRIGHT NOTICE**********************************/
1196 /*© Centro de Regulacio Genomica */
1198 /*Cedric Notredame */
1199 /*Tue Oct 27 10:12:26 WEST 2009. */
1200 /*All rights reserved.*/
1201 /*This file is part of T-COFFEE.*/
1203 /* T-COFFEE is free software; you can redistribute it and/or modify*/
1204 /* it under the terms of the GNU General Public License as published by*/
1205 /* the Free Software Foundation; either version 2 of the License, or*/
1206 /* (at your option) any later version.*/
1208 /* T-COFFEE is distributed in the hope that it will be useful,*/
1209 /* but WITHOUT ANY WARRANTY; without even the implied warranty of*/
1210 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/
1211 /* GNU General Public License for more details.*/
1213 /* You should have received a copy of the GNU General Public License*/
1214 /* along with Foobar; if not, write to the Free Software*/
1215 /* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/
1216 /*............................................... |*/
1217 /* If you need some more information*/
1218 /* cedric.notredame@europe.com*/
1219 /*............................................... |*/
1223 /*********************************COPYRIGHT NOTICE**********************************/