2 Distances of Secondary Structures
3 Walter Fontana, Ivo L Hofacker, Peter F Stadler
12 #include "dist_vars.h"
13 #include "RNAstruct.h"
15 #include "stringdist.h"
17 #include "data_structures.h"
18 #include "RNAdistance_cmdl.h"
20 #define MAXNUM 1000 /* max number of structs for distance matrix */
23 #define PRIVATE static
25 static char rcsid[] = "$Id: RNAdistance.c,v 1.8 2005/07/24 08:35:15 ivo Exp $";
26 PRIVATE void command_line(int argc, char *argv[]);
27 PRIVATE int parse_input(char *line);
28 PRIVATE int check_tree(char *line, char alpha[]);
29 PRIVATE int check_brackets(char *line);
30 PRIVATE void print_aligned_lines(FILE *somewhere);
32 PRIVATE char ruler[] ="....,....1....,....2....,....3....,....4"
33 "....,....5....,....6....,....7....,....8";
36 PRIVATE int taxa_list;
37 PRIVATE char outfile[FILENAME_MAX_LENGTH], *list_title;
39 PRIVATE char ttype[10]="f";
42 int main(int argc, char *argv[])
44 char *line=NULL, *xstruc, *cc;
46 int tree_types = 0, ttree;
47 swString *S[10][MAXNUM];
48 char *P[MAXNUM]; /* structures for base pair distances */
49 int string_types = 0, tstr;
50 int i,j, tt, istty, type;
54 command_line(argc, argv);
56 if((outfile[0]=='\0')&&(task==2)&&(edit_backtrack))
57 strcpy(outfile,"backtrack.file");
58 if(outfile[0]!='\0') somewhere = fopen(outfile,"w");
59 if (somewhere==NULL) somewhere = stdout;
60 istty = isatty(fileno(stdin))&&isatty(fileno(stdout));
63 if ((istty)&&(n==0)) {
64 printf("\nInput structure; @ to quit\n");
65 printf("%s\n", ruler);
68 if (line!=NULL) free(line);
70 } while ((type=parse_input(line))==0);
72 if (((type==999)||(type==888))&&(task==2)) { /* do matrices */
74 printf("* END of taxa list\n");
77 for (tt=0; tt< types; tt++) {
78 printf("> %c %d\n", ttype[tt], n);
79 if(islower(ttype[tt])) {
82 printf("%g ",tree_edit_distance(T[ttree][i], T[ttree][j]));
84 fprintf(somewhere,"%d %d",i+1,j+1);
85 if (ttype[tt]=='f') unexpand_aligned_F(aligned_line);
86 print_aligned_lines(somewhere);
92 for (i=0; i<n; i++) free_tree(T[ttree][i]);
98 printf("%g ", (float) bp_distance(P[i], P[j]));
102 for (i=0; i<n; i++) free(P[i]);
104 else if(isupper(ttype[tt])) {
105 for (i=1; i<n; i++) {
106 for (j=0; j<i; j++) {
107 printf("%g ",string_edit_distance(S[tstr][i], S[tstr][j]));
108 if (edit_backtrack) {
109 fprintf(somewhere,"%d %d",i+1,j+1);
110 if (ttype[tt]=='F') unexpand_aligned_F(aligned_line);
111 print_aligned_lines(somewhere);
117 for (i=0; i<n; i++) free(S[tstr][i]);
122 if (type==888) { /* do another distance matrix */
124 printf("%s\n", list_title);
128 } /* END do matrices */
129 if (type==999) { /* finito */
130 if (outfile[0]!='\0') fclose(somewhere);
135 xstruc = add_root(line);
141 xstruc = unexpand_Full(line);
148 for(tt=0; tt < types; tt++) {
152 if (type!=1) nrerror("Can't convert back to full structure");
153 xstruc = expand_Full(line);
154 if(islower(ttype[tt])) { /* tree_edit */
155 T[tree_types++][n] = make_tree(xstruc);
157 if(isupper(ttype[tt])) { /* string edit */
158 S[string_types++][n] = Make_swString(xstruc);
163 if (type!=1) nrerror("Can't convert back to full structure");
170 xstruc = b2HIT(line);
171 if(islower(ttype[tt])) {
172 T[tree_types++][n] = make_tree(xstruc);
174 if(isupper(ttype[tt])) {
175 S[string_types++][n] = Make_swString(xstruc);
180 nrerror("Can't convert to HIT structure");
188 xstruc = expand_Shapiro(cc);
192 cc = expand_Shapiro(line);
193 xstruc = unweight(cc);
197 xstruc = unweight(line);
200 nrerror("Unknown structure representation");
203 if(islower(ttype[tt])) {
204 T[tree_types++][n] = make_tree(xstruc);
206 if(isupper(ttype[tt])) {
207 S[string_types++][n] = Make_swString(xstruc);
214 xstruc = b2Shapiro(line);
215 if(islower(ttype[tt])) {
216 T[tree_types++][n] = make_tree(xstruc);
218 if(isupper(ttype[tt])) {
219 S[string_types++][n] = Make_swString(xstruc);
224 if(islower(ttype[tt])) {
225 T[tree_types++][n] = make_tree(line);
227 if(isupper(ttype[tt])) {
228 S[string_types++][n] = Make_swString(line);
233 nrerror("Unknown distance type");
241 for (it=0, is=0, i=0; i<types; i++) {
242 if(islower(ttype[i])) {
243 dist = tree_edit_distance(T[it][0], T[it][1]);
244 free_tree(T[it][0]); free_tree(T[it][1]);
247 else if (ttype[i]=='P') {
248 dist = (float) bp_distance(P[0], P[1]);
249 free(P[0]); free(P[1]);
251 else /* isupper(ttype[i]) */ {
252 dist = string_edit_distance(S[is][0], S[is][1]);
253 free(S[is][0]); free(S[is][1]);
256 printf("%c: %g ", ttype[i], dist);
257 if ((edit_backtrack)&&(ttype[i]!='P')) {
258 if (ttype[i]=='f') unexpand_aligned_F(aligned_line);
259 print_aligned_lines(somewhere);
268 for (it=0, is=0, i=0; i<types; i++) {
269 if(islower(ttype[i])) {
270 dist = tree_edit_distance(T[it][1], T[it][0]);
274 else if (ttype[i]=='P') {
275 dist = (float) bp_distance(P[0], P[1]);
278 else /* if(isupper(ttype[i])) */ {
279 dist = string_edit_distance(S[is][0], S[is][1]);
283 printf("%c: %g ", ttype[i], dist);
284 if ((edit_backtrack)&&(ttype[i]!='P')) {
285 if (ttype[i]=='f') unexpand_aligned_F(aligned_line);
286 print_aligned_lines(somewhere);
295 for (it=0, is=0, i=0; i<types; i++) {
296 if(islower(ttype[i])) {
297 dist = tree_edit_distance(T[it][1], T[it][0]);
302 else if (ttype[i]=='P') {
303 dist = (float) bp_distance(P[0], P[1]);
304 free(P[0]); P[0] = P[1];
306 else /* if(isupper(ttype[i])) */ {
307 dist = string_edit_distance(S[is][0], S[is][1]);
312 printf("%c: %g ", ttype[i], dist);
313 if ((edit_backtrack)&&(ttype[i]!='P')) {
314 if (ttype[i]=='f') unexpand_aligned_F(aligned_line);
315 print_aligned_lines(somewhere);
328 /*--------------------------------------------------------------------------*/
330 PRIVATE int parse_input(char *line)
332 int type, rooted=0, i, xx;
335 if (line==NULL) return 999;
338 if (task==2) taxa_list=1;
339 printf("%s\n", line);
342 list_title = strdup(line);
348 printf("%d :%s\n", n+1, line+1);
349 else printf("%s\n", line);
353 cp = strchr(line,' ');
354 if (cp) *cp='\0'; /* get rid of junk at the end of line */
361 case '(' : /* it's a tree */
364 type = 4; /* coarse */
371 if ( (line[i]=='U')||(line[i]=='P') ) {
372 type = 2; /* FFull */
379 break; /* Shapiro tree */
381 if ((line[i]!='(')&&(line[i]!=')')) xx = 1;
386 rooted = (line[strlen(line)-2]=='R');
397 if(check_brackets(line))
401 if(check_tree(line,"UP") ) {
407 if(check_tree(line,"HBIMSE") ){
408 if(rooted) return -3;
413 if(check_tree(line,"HBIM") ){
422 /*--------------------------------------------------------------------------*/
425 PRIVATE int check_tree(char *line, char alpha[])
430 n = (int) strlen(line);
432 if( line[0] != '(' ) return 0;
435 while( line[i] == '(' ){
439 pos=strchr(alpha, (int)line[i]);
441 while (isdigit((int) line[++i]));
442 if (line[i]!=')') return 0;
446 if ((i!=n-1)||(line[i]!=')')) return 0;
451 if( (i<n)&&(line[i+1]==')') ) return 0;
460 /*--------------------------------------------------------------------------*/
463 PRIVATE int check_brackets(char *line)
490 /*--------------------------------------------------------------------------*/
493 PRIVATE void command_line(int argc, char *argv[])
496 struct RNAdistance_args_info args_info;
500 types=1; ttype[0]='f'; task=1;
503 #############################################
504 # check the command line parameters
505 #############################################
507 if(RNAdistance_cmdline_parser (argc, argv, &args_info) != 0) exit(1);
509 /* use specified distance representations */
510 if(args_info.distance_given){
511 strncpy(ttype, args_info.distance_arg,9);
512 types = (int)strlen(ttype);
515 if(args_info.compare_given){
516 switch(args_info.compare_arg[0]){
517 case 'p': task=1; break;
518 case 'm': task=2; break;
519 case 'f': task=3; break;
520 case 'c': task=4; break;
521 default: RNAdistance_cmdline_parser_print_help();
526 if(args_info.shapiro_given)
529 if(args_info.backtrack_given){
530 if(strcmp(args_info.backtrack_arg, "none")){
531 strncpy(outfile, args_info.backtrack_arg, FILENAME_MAX_LENGTH-1);
536 /* free allocated memory of command line data structure */
537 RNAdistance_cmdline_parser_free (&args_info);
540 /*--------------------------------------------------------------------------*/
542 PRIVATE void print_aligned_lines(FILE *somewhere)
544 if (edit_backtrack) {
545 fprintf(somewhere, "\n%s\n%s\n", aligned_line[0], aligned_line[1]);