8 #include "io_lib_header.h"
9 #include "util_lib_header.h"
10 #include "dp_lib_header.h"
11 #include "define_header.h"
14 int aln_compare ( int argc, char *argv[])
20 Sequence *SA, *SB, *TOT_SEQ=NULL;
21 Sequence *defined_residueA;
22 Sequence *defined_residueB;
36 char compare_mode[STRING];
38 char *alignment1_file;
39 char *alignment2_file;
44 char io_format[STRING];
55 int output_aln_threshold;
56 char output_aln_file[LONG_STRING];
57 char output_aln_format[LONG_STRING];
58 char output_aln_modif[LONG_STRING];
60 Constraint_list *CL_A;
61 Constraint_list *CL_B;
67 /*Column Comparison Variables*/
85 int *n_sub_categories;
90 int **aln_output_count;
96 char sim_matrix[STRING];
100 char *sim_category_list;
101 int *sim_n_sub_categories;
104 if ( argc==1|| strm6 ( argv[1], "h", "-h", "help", "-help", "-man", "?"))
106 output_informations();
109 argv=standard_initialisation (argv, &argc);
110 /*Declarations and Initializations*/
111 alignment1_file=vcalloc ( LONG_STRING, sizeof (char));
112 alignment2_file=vcalloc ( LONG_STRING, sizeof (char));
114 pep1_file=vcalloc ( LONG_STRING, sizeof (char));
115 pep2_file=vcalloc ( LONG_STRING, sizeof (char));
118 sprintf (compare_mode, "sp");
124 grep_list=vcalloc ( STRING, sizeof (char**));
125 for ( a=0; a< STRING; a++)grep_list[a]=declare_char (3, STRING);
129 struct_file=declare_char ( MAX_N_STRUC, LONG_STRING);
130 struct_format=declare_char (MAX_N_STRUC, STRING);
132 n_symbol=vcalloc ( MAX_N_STRUC, sizeof (int));
133 symbol_list=vcalloc (MAX_N_STRUC, sizeof (char**));
134 for ( a=0; a< MAX_N_STRUC; a++)symbol_list[a]=declare_char ( 100, 100);
140 category=vcalloc ( MAX_N_CATEGORIES, sizeof (char**));
141 for ( a=0; a< MAX_N_CATEGORIES; a++)category[a]=declare_char(100, STRING);
142 n_sub_categories=vcalloc ( 100, sizeof (int));
143 category_list=vcalloc ( LONG_STRING, sizeof (char));
144 sprintf ( category_list, "[*][*]=[ALL]");
148 sim_category=vcalloc ( MAX_N_CATEGORIES, sizeof (char**));
149 for ( a=0; a< MAX_N_CATEGORIES; a++)sim_category[a]=declare_char(100, STRING);
150 sim_n_sub_categories=vcalloc ( 100, sizeof (int));
151 sim_category_list=vcalloc ( LONG_STRING, sizeof (char));
152 sprintf ( sim_category_list, "[*][*]=[ALL]");
153 sprintf ( sim_aln, "al1");
155 sprintf ( sim_category_list, "[*][*]=[ALL]");
157 sprintf ( io_format, "ht");
158 sprintf ( io_file, "stdout");
163 output_aln_threshold=100;
164 sprintf ( output_aln_file, "stdout");
165 sprintf ( output_aln_format, "clustalw");
166 sprintf ( output_aln_modif, "lower");
167 /*END OF INITIALIZATION*/
174 for ( a=1; a< argc; a++)
176 if (strcmp ( argv[a], "-f")==0)
178 sprintf (io_file,"%s", argv[++a]);
180 else if ( strcmp ( argv[a], "-sim_aln")==0)
182 sprintf (sim_aln,"%s", argv[++a]);
184 else if ( strcmp ( argv[a], "-sim_matrix")==0)
186 sprintf (sim_matrix,"%s", argv[++a]);
188 else if ( strm ( argv[a], "-compare_mode"))
190 sprintf ( compare_mode, "%s", argv[++a]);
192 else if ( strcmp ( argv[a], "-sim_cat")==0)
194 if ( argv[++a][0]!='[')
196 if ( strcmp ( argv[a], "3d_ali")==0)sprintf ( sim_category_list, "[b][b]+[h][h]=[struc]");
199 fprintf ( stderr, "\n%s: Unknown category for distance measure", argv[a]);
205 sprintf ( sim_category_list, "%s", argv[a]);
208 else if ( strcmp ( argv[a], "-grep_value")==0)
210 sprintf ( grep_list[n_greps][0], "%s", argv[++a]);
211 sprintf ( grep_list[n_greps][1], "%s", argv[++a]);
215 else if ( strcmp ( argv[a], "-al1")==0)
218 sprintf ( alignment1_file, "%s", argv[++a]);
221 else if ( strcmp ( argv[a], "-al2")==0)
224 sprintf ( alignment2_file, "%s", argv[++a]);
227 else if ( strcmp ( argv[a], "-pep1")==0)
230 sprintf ( pep1_file, "%s", argv[++a]);
232 else if ( strcmp ( argv[a], "-pep2")==0)
235 sprintf ( pep2_file, "%s", argv[++a]);
237 else if ( strcmp ( argv[a], "-pep")==0)
241 else if ( strcmp ( argv[a], "-count")==0)
245 else if ( strcmp ( argv[a], "-output_aln")==0)
249 else if ( strcmp ( argv[a], "-output_aln_threshold")==0)
251 output_aln_threshold=atoi(argv[++a]);
253 else if ( strcmp ( argv[a], "-output_aln_file")==0)
255 sprintf (output_aln_file,"%s",argv[++a]);
257 else if ( strcmp ( argv[a], "-output_aln_format")==0)
259 sprintf (output_aln_format,"%s",argv[++a]);
261 else if ( strcmp ( argv[a], "-output_aln_modif")==0)
263 sprintf (output_aln_modif,"%s",argv[++a]);
265 else if ( strcmp ( argv[a], "-st")==0)
267 sprintf ( struct_file [n_structure], "%s", argv[++a]);
268 if (!NEXT_ARG_IS_FLAG && is_a_struc_format (argv[a+1]))
269 sprintf ( struct_format[n_structure], "%s", argv[++a]);
271 sprintf ( struct_format[n_structure], "%s", "pep");
273 if ( !NEXT_ARG_IS_FLAG && strcmp ( argv[a+1], "conv")==0)
276 while(!NEXT_ARG_IS_FLAG)
278 sprintf ( symbol_list[n_structure][n_symbol[n_structure]], "%s", argv[++a]);
279 n_symbol[n_structure]++;
282 else if (!NEXT_ARG_IS_FLAG)
284 symbol_list[n_structure]=make_symbols ( argv[++a], &n_symbol[n_structure]);
289 symbol_list[n_structure]=make_symbols ( "any", &n_symbol[n_structure]);
295 else if ( strcmp (argv[a], "-sep")==0)
297 if ( !NEXT_ARG_IS_FLAG)
298 get_separating_char ( argv[++a][0], &sep_l, &sep_r);
302 else if ( strncmp ( argv[a], "-io_format",5)==0)
304 sprintf ( io_format, "%s", argv[++a]);
306 else if ( strcmp ( argv[a], "-io_cat")==0)
308 if ( argv[++a][0]!='[')
310 if ( strcmp ( argv[a], "3d_ali")==0)sprintf ( category_list, "[b][b]+[h][h]=[struc];[*][*]=[tot]");
314 sprintf ( category_list, "%s", argv[a]);
319 fprintf ( stdout, "\nOPTION %s UNKNOWN[FATAL]\n", argv[a]);
320 myexit (EXIT_FAILURE);
324 /*PARAMETER PROCESSING*/
326 if ( pep_compare==1 || count==1)aln_compare=0;
327 if ( aln_compare==1)pep_compare=0;
329 /*READ THE TOTAL SEQUENCES*/
330 seq_list=declare_char ( 100,STRING);
333 if ( alignment1_file[0] && !check_file_exists ( alignment1_file))
335 fprintf (stderr, "\nERROR: %s DOES NOT EXIST[FATAL:%s]\n", alignment1_file, PROGRAM);
336 myexit(EXIT_FAILURE);
338 if ( alignment2_file[0] && !check_file_exists ( alignment2_file))
340 fprintf (stderr, "\nERROR: %s DOES NOT EXIST[FATAL:%s]\n", alignment2_file, PROGRAM);
341 myexit(EXIT_FAILURE);
343 if ( pep1_file[0] && !check_file_exists ( pep1_file))
345 fprintf (stderr, "\nERROR: %s DOES NOT EXIST[FATAL:%s]\n", pep1_file, PROGRAM);
346 myexit(EXIT_FAILURE);
348 if ( pep2_file[0] && !check_file_exists ( pep2_file))
350 fprintf (stderr, "\nERROR: %s DOES NOT EXIST[FATAL:%s]\n", pep2_file, PROGRAM);
351 myexit(EXIT_FAILURE);
354 if ( alignment1_file[0])sprintf ( seq_list[n_seq_file++], "A%s", alignment1_file);
355 if ( alignment2_file[0])sprintf ( seq_list[n_seq_file++], "A%s", alignment2_file);
356 if ( pep1_file[0])sprintf ( seq_list[n_seq_file++], "S%s", pep1_file);
357 if ( pep2_file[0])sprintf ( seq_list[n_seq_file++], "S%s", pep2_file);
361 TOT_SEQ=read_seq_in_n_list ( seq_list, n_seq_file, NULL, NULL);
363 A=declare_aln (TOT_SEQ);
364 B=declare_aln (TOT_SEQ);
367 /*1 COMPARISON OF THE SEQUENCES*/
368 if ( pep_compare==1 || count==1)
372 if ( pep1_file[0]!='\0')SA=main_read_seq (pep1_file);
373 else if (alignment1_file[0]!='\0')
375 main_read_aln ( alignment1_file, A);
380 main_read_aln ("stdin", A);
381 sprintf ( alignment1_file, "stdin");
384 if ( pep2_file[0]!='\0')SB=main_read_seq (pep2_file);
385 else if (alignment2_file[0]!='\0')
387 main_read_aln ( alignment2_file, B);
394 sprintf ( alignment2_file, "%s", alignment1_file );
396 buf1=(pep1_file[0]!='\0')?pep1_file: alignment1_file;
397 buf2=(pep2_file[0]!='\0')?pep2_file: alignment2_file;
398 /*Output of the Results*/
400 fp=vfopen ( io_file, "w");
405 fprintf (fp, "Number of seq: %d %d\n", SA->nseq,SA->nseq);
406 for ( a=0; a< SA->nseq; a++) fprintf (fp, "%-15s %d %d\n", SA->name[a], (int)strlen (SA->seq[a]), (int)strlen (SA->seq[a]));
409 if (SA->nseq!=SB->nseq)
412 fprintf ( fp, "DIFFERENCE TYPE 1: Different number of sequences %3d/%3d",SA->nseq,SB->nseq);
417 for ( a=0; a< SA->nseq; a++)
419 lower_string (SA->seq[a]);
420 lower_string (SB->seq[a]);
424 if ( strcmp ( SA->seq[a], SB->seq[a])!=0)
426 fprintf ( fp, "DIFFERENCE TYPE 2: %s is different in the 2 files\n", SA->name[a]);
430 for ( a=0; a< SA->nseq; a++)
432 lower_string (SA->seq[a]);
433 lower_string (SB->seq[a]);
437 if ( strlen ( SA->seq[a])!= strlen (SB->seq[a]))
439 fprintf ( fp, "DIFFERENCE TYPE 3: %s has != length in the 2 files (%d-%d)\n", SA->name[a],(int)strlen ( SA->seq[a]), (int)strlen (SB->seq[a]));
445 fprintf ( fp, "\nDIFFERENCES found between:\n\t%s\n\t%s\n**********\n\n",buf1, buf2);
450 /*2 COMPARISON OF THE ALIGNMENTS*/
451 else if ( aln_compare==1)
454 n_categories=parse_category_list ( category_list, category, n_sub_categories);
455 sim_n_categories=parse_category_list ( sim_category_list, sim_category, sim_n_sub_categories);
457 main_read_aln ( alignment1_file, A);
458 main_read_aln ( alignment2_file, B);
459 CLS=trim_aln_seq ( A, B);
462 defined_residueA=get_defined_residues (A);
463 defined_residueB=get_defined_residues (B);
466 A=thread_defined_residues_on_aln(A, defined_residueA);
467 A=thread_defined_residues_on_aln(A, defined_residueB);
468 B=thread_defined_residues_on_aln(B, defined_residueA);
469 B=thread_defined_residues_on_aln(B, defined_residueB);
472 CL_A=declare_constraint_list ( CLS, NULL, NULL, 0, NULL, NULL);
473 CL_B=declare_constraint_list ( CLS, NULL, NULL, 0, NULL, NULL);
476 CL_A=aln2constraint_list (A,CL_A, "sim");
477 CL_B=aln2constraint_list (B,CL_B, "sim");
481 glob=vcalloc ( A->nseq+1, sizeof (int));
482 pw_glob=declare_int ( A->nseq+1, A->nseq+1);
485 if ( strm( compare_mode, "sp"))
488 while (entry=extract_entry (CL_A))
499 if ((main_search_in_list_constraint (entry,&pos_in_clist,4,CL_B))!=NULL)
500 add_entry2list (entry,CL_A);
504 else if ( strm( compare_mode, "column") )
507 posA=aln2pos_simple_2(A);
508 posB=aln2pos_simple_2(B);
509 seq_cache=declare_int ( A->nseq, A->len_aln+1);
510 for ( a=0; a< A->len_aln; a++)
511 for ( b=0; b<B->len_aln; b++)
513 is_same=compare_pos_column(posA, a, posB, b, A->nseq);
516 for (c=0; c< A->nseq;c++)
520 seq_cache[c][posA[c][a]]=1;
526 while (entry=extract_entry (CL_A))
538 if (seq_cache[s1][r1])
541 add_entry2list (entry, CL_A);
546 free_int (seq_cache, -1);
548 else if ( strm( compare_mode, "tc") )
550 correct_column = vcalloc(A->len_aln+1, sizeof (int));
551 posA=aln2pos_simple_2(A);
552 posB=aln2pos_simple_2(B);
553 for ( a=0; a< A->len_aln; a++)
554 for ( b=0; b<B->len_aln; b++)
556 is_same = compare_pos_column(posA, a, posB, b, A->nseq);
558 correct_column[a] = is_same;
563 for ( a=0; a< n_structure; a++)
565 ST=read_structure (struct_file[a],struct_format[a], A,B,ST,n_symbol[a], symbol_list[a]);
568 /*RESULT ARRAY DECLARATION*/
570 tot_count=declare_int (n_categories+1, A->nseq+1);
571 pos_count=declare_int (n_categories+1, A->nseq+1);
572 pw_tot_count=vcalloc ( A->nseq, sizeof (int**));
573 for ( a=0; a< A->nseq; a++)pw_tot_count[a]=declare_int ( A->nseq, n_categories);
576 pw_pos_count=vcalloc ( A->nseq, sizeof (int**));
577 for ( a=0; a< A->nseq; a++)pw_pos_count[a]=declare_int ( A->nseq, n_categories);
579 /*COMPARISON MODULE*/
580 if (strm( compare_mode, "tc") )
582 int column_is_structure;
583 int pair_is_structure;
585 for ( a=0; a< n_categories; a++)
587 for(b = 0; b < A->len_aln; b++)
589 column_is_structure = 1;
590 pair_is_structure = 0;
591 for (s1=0; s1< A->nseq-1; s1++)
593 for(s2=s1+1; s2 < A->nseq; s2++)
595 if ((posA[s1][b]>0) && (posA[s2][b]>0))
597 pair_is_structure=is_in_struct_category(s1,s2,posA[s1][b],posA[s2][b],ST,category[a], n_sub_categories[a]);
598 column_is_structure = (column_is_structure && pair_is_structure);
599 if(pair_is_structure)
601 tot_count[a][s1+1]++;
602 tot_count[a][s2+1]++;
603 pw_tot_count[s1][s2][a]++;
604 pw_tot_count[s2][s1][a]++;
605 if (correct_column[b])
607 pw_pos_count[s1][s2][a]++;
608 pw_pos_count[s2][s1][a]++;
609 pos_count[a][s1+1]++;
610 pos_count[a][s2+1]++;
616 if(column_is_structure && pair_is_structure)
619 if(correct_column[b])
630 /*COMPARISON MODULE*/
632 for ( a=0; a< n_categories; a++)
634 while (entry=extract_entry (CL_A))
641 if ( is_in_struct_category ( s1, s2, r1, r2, ST, category[a], n_sub_categories[a]))
644 tot_count[a][s1+1]++;
645 tot_count[a][s2+1]++;
646 pw_tot_count[s1][s2][a]++;
647 pw_tot_count[s2][s1][a]++;
650 pw_pos_count[s1][s2][a]++;
651 pw_pos_count[s2][s1][a]++;
653 pos_count[a][s1+1]++;
654 pos_count[a][s2+1]++;
660 // printf("pos=%d,tot=%d\n", pos_count[0][0], tot_count[0][0]);
661 /*Measure of Aligned Sequences Similarity*/
663 sim=get_aln_compare_sim ((strcmp (sim_aln, "al1")==0)?A:B, ST,sim_category[0], sim_n_sub_categories[0], sim_matrix);
664 sim_param=analyse_sim ((strcmp (sim_aln, "al1")==0)?A:B, sim);
666 /*Fill the Result_structure*/
667 R=vcalloc ( 1, sizeof (Result));
669 R->grep_list=grep_list;
677 R->alignment1_file=alignment1_file;
678 R->alignment2_file=alignment2_file;
679 R->io_format=io_format;
680 R->n_structure=n_structure;
681 R->struct_file=struct_file;
682 R->struct_format=struct_format;
683 R->n_symbol=n_symbol;
684 R->symbol_list=symbol_list;
687 R->tot_count=tot_count;
688 R->pos_count=pos_count;
689 R->pw_tot_count=pw_tot_count;
690 R->pw_pos_count=pw_pos_count;
693 R->n_categories=n_categories;
694 R->category=category;
695 R->category_list=category_list;
696 R->n_sub_categories=n_sub_categories;
698 R->sim_param=sim_param;
699 R->sim_matrix=sim_matrix;
700 R->sim_n_categories=sim_n_categories;
701 R->sim_category=sim_category;
702 R->sim_category_list=sim_category_list;
703 R->sim_n_sub_categories=sim_n_sub_categories;
707 /*Output of the Results*/
709 fp=vfopen ( io_file, "w");
710 fp=output_format (io_format, fp, R);
714 /*Rewriting of Alignment A*/
719 aln_output_tot =declare_int ( A->nseq, A->len_aln+1);
720 aln_output_count=declare_int ( A->nseq, A->len_aln+1);
722 while (entry=extract_entry (CL_A))
724 aln_output_tot[entry[SEQ1]][entry[R1]]++;
725 aln_output_tot[entry[SEQ2]][entry[R2]]++;
727 aln_output_count[entry[SEQ1]][entry[R1]]+=entry[MISC];
728 aln_output_count[entry[SEQ2]][entry[R2]]+=entry[MISC];
731 for ( a=0; a< A->nseq; a++)
734 for (c=0, b=0; b< A->len_aln; b++)
736 if ( !is_gap(A->seq_al[a][b]))
739 if ( aln_output_tot[a][c] && ((aln_output_count[a][c]*100)/aln_output_tot[a][c])<output_aln_threshold)
741 if ( strm (output_aln_modif, "lower"))
742 A->seq_al[a][b]=tolower(A->seq_al[a][b]);
744 A->seq_al[a][b]=output_aln_modif[0];
746 else A->seq_al[a][b]=toupper(A->seq_al[a][b]);
751 A->score_aln=(int)(R->tot_count[0][0]==0)?0:((R->pos_count[0][0]*100)/(float)R->tot_count[0][0]);
753 output_format_aln (output_aln_format,A,NULL,output_aln_file);
755 free_int ( aln_output_tot, -1);
756 free_int ( aln_output_count, -1 );
761 /************************************************************************************/
766 /************************************************************************************/
767 FILE *output_format (char *iof,FILE *fp, Result *R)
776 t: total results (global);
781 for ( a=0; a< l; a++)
783 if ( iof[a]=='H')fp=output_large_header (fp,R);
784 else if ( iof[a]=='h')fp=output_header (fp,R);
785 else if ( iof[a]=='t')fp=output_total_results (fp, R);
786 else if ( iof[a]=='s')fp=output_sequence_results (fp,R);
787 else if ( iof[a]=='p')fp=output_pair_wise_sequence_results(fp,R);
792 FILE *output_pair_wise_sequence_results (FILE *fp, Result *R)
797 for ( c=0; c<(R->A)->nseq-1; c++)
799 for ( d=c+1; d< (R->A)->nseq; d++)
801 fprintf (fp, "%-10s %-10s%s",(R->A)->name[c],(R->A)->name[d],SSPACE);
802 fprintf (fp, "%5.1f%s", R->sim[c][d], SSPACE);
804 for (a=0; a< R->n_categories; a++)
806 fprintf ( fp, "%5.1f ",(R->pw_tot_count[c][d][a]==0)?0:((float)(R->pw_pos_count[c][d][a]*100)/(float)R->pw_tot_count[c][d][a]));
807 fprintf ( fp, "%c%5.1f%c%s",(R->sep_l),(R->pw_glob[c][d]==0)?0:((float)(R->pw_tot_count[c][d][a]*100)/(float)R->pw_glob[c][d]),(R->sep_r),SSPACE);
809 fprintf ( fp, "%c%5d%c\n",(R->sep_l), R->pw_glob[c][d],(R->sep_r));
815 FILE *output_sequence_results (FILE *fp, Result *R)
819 for ( c=1; c<=R->A->nseq; c++)
821 fprintf (fp, "%-10s %-10s%s",(R->A)->name[c-1], "..",SSPACE);
822 fprintf (fp, "%5.1f%s", R->sim_param[c-1][0],SSPACE);
823 for (a=0; a< R->n_categories; a++)
825 fprintf ( fp, "%5.1f ",(R->tot_count[a][c]==0)?0:((float)(R->pos_count[a][c]*100)/(float)R->tot_count[a][c]));
826 fprintf ( fp, "%c%5.1f%c%s",(R->sep_l),(R->glob[c]==0)?0:((float)(R->tot_count[a][c]*100)/(float)R->glob[c]),(R->sep_r), SSPACE);
828 fprintf ( fp, "%c%5d%c\n",(R->sep_l), R->glob[c],(R->sep_r));
834 FILE *output_total_results (FILE *fp, Result *R)
839 fprintf ( fp, "%-13s %-7d%s",extract_suffixe (R->alignment1_file),(R->A)->nseq, SSPACE);
840 fprintf (fp, "%5.1f%s", R->sim_param[(R->A)->nseq][0], SSPACE);
841 for (a=0; a< R->n_categories; a++)
843 fprintf ( fp, "%5.1f ",(R->tot_count[a][0]==0)?0:((float)(R->pos_count[a][0]*100)/(float)R->tot_count[a][0]));
844 fprintf ( fp, "%c%5.1f%c%s",(R->sep_l),(R->glob[0]==0)?0:((float)(R->tot_count[a][0]*100)/(float)R->glob[0]),(R->sep_r), SSPACE);
846 fprintf ( fp, "%c%5d%c\n",(R->sep_l), R->glob[0],(R->sep_r));
849 FILE *output_header (FILE *fp, Result *R)
854 fprintf ( fp, "%s\n",generate_string ( R->n_categories*(13+strlen(SSPACE))+31+2*strlen(SSPACE),'*'));
856 fprintf ( fp, "%-10s %-10s %s%-3s%s", "seq1", "seq2",SSPACE,"Sim",SSPACE);
858 for ( a=0; a< R->n_categories; a++)
859 fprintf ( fp, "%-12s%s ",R->category[a][0], SSPACE);
860 fprintf (fp, "%-5s", "Tot");
864 FILE *output_large_header ( FILE *fp, Result *R)
868 fprintf ( fp, "AL1: %s\n", R->alignment1_file);
869 fprintf ( fp, "AL2: %s\n", R->alignment2_file);
870 for ( a=0; a< R->n_structure; a++)
872 fprintf (fp, "ST %d: %s [%s]/[", a, R->struct_file[a], R->struct_format[a]);
873 for ( b=0; b< R->n_symbol[a]; b++)fprintf (fp, "%s ", R->symbol_list[a][b]);
874 fprintf ( fp, "]\n");
879 void get_separating_char ( char s, char *l, char *r)
881 if ( s=='{' || s=='}')
887 else if ( s==']' || s=='[')
893 else if ( s==')' || s=='(')
907 /************************************************************************************/
912 /************************************************************************************/
913 float **get_aln_compare_sim ( Alignment *A, Structure *ST, char **cat, int n_cat, char *matrix)
923 sim=declare_float ( A->nseq, A->nseq);
925 for ( a=0; a<A->nseq-1; a++)
927 for (b=a+1; b< A->nseq; b++)
929 for ( r1=0, r2=0,tot=0, pos=0,c=0; c< A->len_aln; c++)
931 p1=is_gap(A->seq_al[a][c]);
932 p2=is_gap(A->seq_al[b][c]);
940 if (is_in_struct_category (a, b, r1, r2, ST, cat, n_cat))
943 pos+=is_in_same_group_aa ( cr1, cr2, 0, NULL, matrix);
947 sim[a][b]=sim[b][a]=(tot==0)?0:((pos*100)/tot);
952 float **analyse_sim ( Alignment *A, float **sim)
959 an=declare_float ( A->nseq+1,2);
961 for (d=0, a=0; a< A->nseq;a++)
963 for ( b=0; b< A->nseq; b++)
968 an[A->nseq][0]+=sim[a][b];
972 an[a][0]=((float)an[a][0]/(float)(A->nseq-1));
976 an[A->nseq][0]=an[A->nseq][0]/d;
978 for ( d=0,a=0; a< A->nseq; a++)
980 for ( b=0; b< A->nseq; b++)
984 c=an[a][0]-sim[a][b];
985 an[a][1]+=(c>0)?c:-c;
986 an[A->nseq][1]+=(c>0)?c:-c;
990 an[a][1]=((float)an[a][1]/(float)(A->nseq-1));
993 an[A->nseq][1]=an[A->nseq][1]/d;
1005 /************************************************************************************/
1010 /************************************************************************************/
1011 int is_in_struct_category ( int s1, int s2, int r1, int r2, Structure *ST, char **cat, int n_cat)
1014 static char *struc_r1;
1015 static char *struc_r2;
1017 char second[STRING];
1023 if ( ST==NULL)return 1;
1024 if ( struc_r1!=NULL)
1030 if ( r==NULL)r=declare_int (2, 2);
1031 else r[0][0]=r[1][1]=r[1][0]=r[0][1]=0;
1034 struc_r1=get_structure_residue ( s1, r1, ST);
1035 struc_r2=get_structure_residue ( s2, r2, ST);
1038 for ( a=1; a< n_cat; a+=2)
1040 sprintf ( first, "%s", cat[a]);
1041 sprintf ( second,"%s", cat[a+1]);
1042 r[0][0]=struc_matches_pattern ( struc_r1, first);
1043 r[0][1]=struc_matches_pattern ( struc_r2, first);
1044 r[1][0]=struc_matches_pattern ( struc_r1, second);
1045 r[1][1]=struc_matches_pattern ( struc_r2, second);
1047 if ( (r[0][0]&&r[1][1])||(r[1][0]&&r[0][1]))return 1;
1052 char * get_structure_residue (int seq, int res, Structure *ST)
1057 s=vcalloc ( ST->n_fields+1, sizeof (char));
1058 for (a=0; a< ST->n_fields; a++)
1059 s[a]=ST->struc[seq][res][a];
1064 int struc_matches_pattern ( char *struc, char *pattern)
1072 sprintf ( p, "%s", pattern);
1074 if ( strcmp (p, "*")==0)return 1;
1080 for ( b=0; b<l; b++)
1083 if ( strcmp ( y, "*")==0);
1084 else if ( strchr(y, struc[b])==NULL)return 0;
1085 y=strtok ( NULL, ".");
1087 if ( y==NULL && b<(l-1))
1089 crash ( "\nA Structure is Missing[FATAL]");
1098 Structure * read_structure (char *fname, char *format, Alignment *A,Alignment *B, Structure *ST, int n_symbols, char **symbol_table)
1101 Alignment *StrucAln;
1102 Sequence *SEQ=NULL, *SA=NULL;
1107 if (ST==NULL)ST=declare_structure (A->nseq, A->seq_al);
1109 ST=extend_structure ( ST);
1111 StrucAln=declare_Alignment(NULL);
1112 if (strm ( format, "pep"))
1114 SEQ=main_read_seq ( fname);
1115 StrucAln=seq2aln(SEQ,StrucAln,0);
1118 else if ( strcmp ( format, "aln")==0)
1120 StrucAln=main_read_aln (fname, StrucAln);
1123 reorder_aln(StrucAln, A->name, A->nseq);
1125 SA=aln2seq(StrucAln);
1126 string_array_convert (SA->seq, SA->nseq, n_symbols, symbol_table);
1130 if (SEQ)free_sequence (SEQ, SEQ->nseq);
1135 int parse_category_list ( char *category_list_in, char ***category, int*n_sub_categories)
1138 char *category_list;
1140 category_list=vcalloc ( strlen(category_list_in)+1, sizeof (char));
1141 sprintf (category_list, "%s", category_list_in);
1145 while ((y=strtok(z, ";"))!=NULL)
1147 sprintf ( category[n++][2], "%s", y);
1152 for ( a=0; a< n; a++)
1154 sprintf (category_list,"%s",strtok(category[a][2], "="));
1155 sprintf (category[a][0],"%s",strtok (NULL, "="));
1156 sprintf ( category[a][++n_sub_categories[a]],"%s", strtok(category_list, "[]+"));
1157 while ( ((y=strtok(NULL, "[]+"))!=NULL))
1159 if ( strcmp (y, "#")==0)y=category[a][n_sub_categories[a]];
1160 sprintf ( category[a][++n_sub_categories[a]],"%s",y);
1167 int is_a_struc_format (char *format)
1169 if ( strcmp ( format, "pep")==0)return 1;
1170 if ( strcmp ( format, "aln")==0)return 1;
1173 /************************************************************************************/
1178 /************************************************************************************/
1179 void output_informations ()
1181 fprintf ( stderr, "\nPROGRAM: %s (%s)\n",PROGRAM,VERSION);
1182 fprintf ( stderr, "******INPUT***************************");
1183 fprintf ( stderr, "\n-al1 al1_file");
1184 fprintf ( stderr, "\n-al2 al2_file");
1185 fprintf ( stderr, "\n-compare_mode [sp] or column");
1186 fprintf ( stderr, "\n-pep (compare only the sequences");
1187 fprintf ( stderr, "\n-count");
1188 fprintf ( stderr, "\n-pep1 pep1_file");
1189 fprintf ( stderr, "\n-pep1 pep2_file");
1190 fprintf ( stderr, "\n-st str_file st_format conversion");
1191 fprintf ( stderr, "\n **st_format: aln, pep");
1192 fprintf ( stderr, "\n **conversion: 3d_ali, conv abcZ #X");
1193 fprintf ( stderr, "\nNOTE: Several structures in a row are possible");
1195 fprintf ( stderr, "\n\n****DISTANCE MEASURE*****************");
1196 fprintf ( stderr, "\n-sim_cat category_format or category_name");
1197 fprintf ( stderr, "\n **category_format: [*][*]=[tot]");
1198 fprintf ( stderr, "\n **category_name : 3d_ali (<=>[h][h]+[e][e]=[Struc]");
1199 fprintf ( stderr, "\n-sim_matrix matrix_name");
1200 fprintf ( stderr, "\n **matrix_name: idmat,pam250mt..");
1201 fprintf ( stderr, "\n-sim_aln al1 or al2");
1202 fprintf ( stderr, "\n\n****COMPARISON***********************");
1203 fprintf ( stderr, "\n-io_cat category_format or category_name");
1204 fprintf ( stderr, "\n **category_format: [*][*]=[tot]");
1205 fprintf ( stderr, "\n **category_name : 3d_ali(<=>[h][h]+[e][e]=[Struc];[*][*]=[Tot]");
1206 fprintf ( stderr, "\n\nNOTE: if two structures:");
1207 fprintf ( stderr, "\n[he.123][#]=[he123VShe123];[beh.*][he.2345]=[other]");
1208 fprintf ( stderr, "\n\n****OUTPUT****************************");
1209 fprintf ( stderr, "\n-f stdout");
1210 fprintf ( stderr, "\n stderr");
1211 fprintf ( stderr, "\n file_name");
1212 fprintf ( stderr, "\n-io_format hts");
1213 fprintf ( stderr, "\n H ->large Header");
1214 fprintf ( stderr, "\n h ->small Header");
1215 fprintf ( stderr, "\n t->global (average)results");
1216 fprintf ( stderr, "\n s ->average results for each sequence");
1217 fprintf ( stderr, "\n p ->results for each pair of sequences");
1218 fprintf ( stderr, "\n-output_aln Outputs al1 with conserved bits in Upper");
1219 fprintf ( stderr, "\n-output_aln_threshold [100]");
1220 fprintf ( stderr, "\n-output_aln_file [stdout]");
1221 fprintf ( stderr, "\n-output_aln_format [clustalw]");
1222 fprintf ( stderr, "\n-output_aln_modif [lower]");
1223 fprintf ( stderr, "\n");
1224 myexit (EXIT_SUCCESS);
1227 /*********************************COPYRIGHT NOTICE**********************************/
1228 /*� Centro de Regulacio Genomica */
1230 /*Cedric Notredame */
1231 /*Tue Jul 1 10:00:54 WEST 2008. */
1232 /*All rights reserved.*/
1233 /*This file is part of T-COFFEE.*/
1235 /* T-COFFEE is free software; you can redistribute it and/or modify*/
1236 /* it under the terms of the GNU General Public License as published by*/
1237 /* the Free Software Foundation; either version 2 of the License, or*/
1238 /* (at your option) any later version.*/
1240 /* T-COFFEE is distributed in the hope that it will be useful,*/
1241 /* but WITHOUT ANY WARRANTY; without even the implied warranty of*/
1242 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/
1243 /* GNU General Public License for more details.*/
1245 /* You should have received a copy of the GNU General Public License*/
1246 /* along with Foobar; if not, write to the Free Software*/
1247 /* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/
1248 /*............................................... |*/
1249 /* If you need some more information*/
1250 /* cedric.notredame@europe.com*/
1251 /*............................................... |*/
1255 /*********************************COPYRIGHT NOTICE**********************************/
1256 /******************************COPYRIGHT NOTICE*******************************/
1257 /*© Centro de Regulacio Genomica */
1259 /*Cedric Notredame */
1260 /*Fri Feb 18 08:27:45 CET 2011 - Revision 596. */
1261 /*All rights reserved.*/
1262 /*This file is part of T-COFFEE.*/
1264 /* T-COFFEE is free software; you can redistribute it and/or modify*/
1265 /* it under the terms of the GNU General Public License as published by*/
1266 /* the Free Software Foundation; either version 2 of the License, or*/
1267 /* (at your option) any later version.*/
1269 /* T-COFFEE is distributed in the hope that it will be useful,*/
1270 /* but WITHOUT ANY WARRANTY; without even the implied warranty of*/
1271 /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the*/
1272 /* GNU General Public License for more details.*/
1274 /* You should have received a copy of the GNU General Public License*/
1275 /* along with Foobar; if not, write to the Free Software*/
1276 /* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA*/
1277 /*............................................... |*/
1278 /* If you need some more information*/
1279 /* cedric.notredame@europe.com*/
1280 /*............................................... |*/
1284 /******************************COPYRIGHT NOTICE*******************************/