--- /dev/null
+/*
+ Distances of Secondary Structures
+ Walter Fontana, Ivo L Hofacker, Peter F Stadler
+ Vienna RNA Package
+*/
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <ctype.h>
+#include <unistd.h>
+#include <string.h>
+#include "dist_vars.h"
+#include "RNAstruct.h"
+#include "treedist.h"
+#include "stringdist.h"
+#include "utils.h"
+#include "data_structures.h"
+#include "RNAdistance_cmdl.h"
+
+#define MAXNUM 1000 /* max number of structs for distance matrix */
+
+#define PUBLIC
+#define PRIVATE static
+/*@unused@*/
+static char rcsid[] = "$Id: RNAdistance.c,v 1.8 2005/07/24 08:35:15 ivo Exp $";
+PRIVATE void command_line(int argc, char *argv[]);
+PRIVATE int parse_input(char *line);
+PRIVATE int check_tree(char *line, char alpha[]);
+PRIVATE int check_brackets(char *line);
+PRIVATE void print_aligned_lines(FILE *somewhere);
+
+PRIVATE char ruler[] ="....,....1....,....2....,....3....,....4"
+"....,....5....,....6....,....7....,....8";
+PRIVATE int types=1;
+PRIVATE int task;
+PRIVATE int taxa_list;
+PRIVATE char outfile[FILENAME_MAX_LENGTH], *list_title;
+
+PRIVATE char ttype[10]="f";
+PRIVATE int n=0;
+
+int main(int argc, char *argv[])
+{
+ char *line=NULL, *xstruc, *cc;
+ Tree *T[10][MAXNUM];
+ int tree_types = 0, ttree;
+ swString *S[10][MAXNUM];
+ char *P[MAXNUM]; /* structures for base pair distances */
+ int string_types = 0, tstr;
+ int i,j, tt, istty, type;
+ int it, is;
+ FILE *somewhere=NULL;
+
+ command_line(argc, argv);
+
+ if((outfile[0]=='\0')&&(task==2)&&(edit_backtrack))
+ strcpy(outfile,"backtrack.file");
+ if(outfile[0]!='\0') somewhere = fopen(outfile,"w");
+ if (somewhere==NULL) somewhere = stdout;
+ istty = isatty(fileno(stdin))&&isatty(fileno(stdout));
+
+ do {
+ if ((istty)&&(n==0)) {
+ printf("\nInput structure; @ to quit\n");
+ printf("%s\n", ruler);
+ }
+ do {
+ if (line!=NULL) free(line);
+ line=get_line(stdin);
+ } while ((type=parse_input(line))==0);
+
+ if (((type==999)||(type==888))&&(task==2)) { /* do matrices */
+ if (taxa_list)
+ printf("* END of taxa list\n");
+
+ ttree = 0; tstr = 0;
+ for (tt=0; tt< types; tt++) {
+ printf("> %c %d\n", ttype[tt], n);
+ if(islower(ttype[tt])) {
+ for (i=1; i<n; i++) {
+ for (j=0; j<i; j++) {
+ printf("%g ",tree_edit_distance(T[ttree][i], T[ttree][j]));
+ if(edit_backtrack) {
+ fprintf(somewhere,"%d %d",i+1,j+1);
+ if (ttype[tt]=='f') unexpand_aligned_F(aligned_line);
+ print_aligned_lines(somewhere);
+ }
+ }
+ printf("\n");
+ }
+ printf("\n");
+ for (i=0; i<n; i++) free_tree(T[ttree][i]);
+ ttree++;
+ }
+ if (ttype[tt]=='P') {
+ for (i=1; i<n; i++) {
+ for (j=0; j<i; j++)
+ printf("%g ", (float) bp_distance(P[i], P[j]));
+ printf("\n");
+ }
+ printf("\n");
+ for (i=0; i<n; i++) free(P[i]);
+ }
+ else if(isupper(ttype[tt])) {
+ for (i=1; i<n; i++) {
+ for (j=0; j<i; j++) {
+ printf("%g ",string_edit_distance(S[tstr][i], S[tstr][j]));
+ if (edit_backtrack) {
+ fprintf(somewhere,"%d %d",i+1,j+1);
+ if (ttype[tt]=='F') unexpand_aligned_F(aligned_line);
+ print_aligned_lines(somewhere);
+ }
+ }
+ printf("\n");
+ }
+ printf("\n");
+ for (i=0; i<n; i++) free(S[tstr][i]);
+ tstr++;
+ }
+ }
+ fflush(stdout);
+ if (type==888) { /* do another distance matrix */
+ n = 0;
+ printf("%s\n", list_title);
+ free(list_title);
+ continue;
+ }
+ } /* END do matrices */
+ if (type==999) { /* finito */
+ if (outfile[0]!='\0') fclose(somewhere);
+ return 0;
+ }
+
+ if (type<0) {
+ xstruc = add_root(line);
+ free(line);
+ line = xstruc;
+ type = -type;
+ }
+ if (type==2) {
+ xstruc = unexpand_Full(line);
+ free(line);
+ line = xstruc;
+ type=1;
+ }
+ tree_types = 0;
+ string_types = 0;
+ for(tt=0; tt < types; tt++) {
+ switch(ttype[tt]){
+ case 'f' :
+ case 'F' :
+ if (type!=1) nrerror("Can't convert back to full structure");
+ xstruc = expand_Full(line);
+ if(islower(ttype[tt])) { /* tree_edit */
+ T[tree_types++][n] = make_tree(xstruc);
+ }
+ if(isupper(ttype[tt])) { /* string edit */
+ S[string_types++][n] = Make_swString(xstruc);
+ }
+ free(xstruc);
+ break;
+ case 'P':
+ if (type!=1) nrerror("Can't convert back to full structure");
+ P[n] = strdup(line);
+ break;
+ case 'h' :
+ case 'H' :
+ switch (type) {
+ case 1:
+ xstruc = b2HIT(line);
+ if(islower(ttype[tt])) {
+ T[tree_types++][n] = make_tree(xstruc);
+ }
+ if(isupper(ttype[tt])) {
+ S[string_types++][n] = Make_swString(xstruc);
+ }
+ free(xstruc);
+ break;
+ default:
+ nrerror("Can't convert to HIT structure");
+ }
+ break;
+ case 'c' :
+ case 'C' :
+ switch (type) {
+ case 1:
+ cc = b2C(line);
+ xstruc = expand_Shapiro(cc);
+ free(cc);
+ break;
+ case 4:
+ cc = expand_Shapiro(line);
+ xstruc = unweight(cc);
+ free(cc);
+ break;
+ case 3:
+ xstruc = unweight(line);
+ break;
+ default:
+ nrerror("Unknown structure representation");
+ exit(0);
+ }
+ if(islower(ttype[tt])) {
+ T[tree_types++][n] = make_tree(xstruc);
+ }
+ if(isupper(ttype[tt])) {
+ S[string_types++][n] = Make_swString(xstruc);
+ }
+ free(xstruc);
+ break;
+ case 'w' :
+ case 'W' :
+ if (type==1) {
+ xstruc = b2Shapiro(line);
+ if(islower(ttype[tt])) {
+ T[tree_types++][n] = make_tree(xstruc);
+ }
+ if(isupper(ttype[tt])) {
+ S[string_types++][n] = Make_swString(xstruc);
+ }
+ free(xstruc);
+ }
+ else {
+ if(islower(ttype[tt])) {
+ T[tree_types++][n] = make_tree(line);
+ }
+ if(isupper(ttype[tt])) {
+ S[string_types++][n] = Make_swString(line);
+ }
+ }
+ break;
+ default:
+ nrerror("Unknown distance type");
+ }
+ }
+ n++;
+ switch (task) {
+ float dist;
+ case 1:
+ if (n==2) {
+ for (it=0, is=0, i=0; i<types; i++) {
+ if(islower(ttype[i])) {
+ dist = tree_edit_distance(T[it][0], T[it][1]);
+ free_tree(T[it][0]); free_tree(T[it][1]);
+ it++;
+ }
+ else if (ttype[i]=='P') {
+ dist = (float) bp_distance(P[0], P[1]);
+ free(P[0]); free(P[1]);
+ }
+ else /* isupper(ttype[i]) */ {
+ dist = string_edit_distance(S[is][0], S[is][1]);
+ free(S[is][0]); free(S[is][1]);
+ is++;
+ }
+ printf("%c: %g ", ttype[i], dist);
+ if ((edit_backtrack)&&(ttype[i]!='P')) {
+ if (ttype[i]=='f') unexpand_aligned_F(aligned_line);
+ print_aligned_lines(somewhere);
+ }
+ }
+ printf("\n");
+ n=0;
+ }
+ break;
+ case 3:
+ if (n>1) {
+ for (it=0, is=0, i=0; i<types; i++) {
+ if(islower(ttype[i])) {
+ dist = tree_edit_distance(T[it][1], T[it][0]);
+ free_tree(T[it][1]);
+ it++;
+ }
+ else if (ttype[i]=='P') {
+ dist = (float) bp_distance(P[0], P[1]);
+ free(P[1]);
+ }
+ else /* if(isupper(ttype[i])) */ {
+ dist = string_edit_distance(S[is][0], S[is][1]);
+ free(S[is][1]);
+ is++;
+ }
+ printf("%c: %g ", ttype[i], dist);
+ if ((edit_backtrack)&&(ttype[i]!='P')) {
+ if (ttype[i]=='f') unexpand_aligned_F(aligned_line);
+ print_aligned_lines(somewhere);
+ }
+ }
+ printf("\n");
+ n=1;
+ }
+ break;
+ case 4:
+ if (n>1) {
+ for (it=0, is=0, i=0; i<types; i++) {
+ if(islower(ttype[i])) {
+ dist = tree_edit_distance(T[it][1], T[it][0]);
+ free_tree(T[it][0]);
+ T[it][0] = T[it][1];
+ it++;
+ }
+ else if (ttype[i]=='P') {
+ dist = (float) bp_distance(P[0], P[1]);
+ free(P[0]); P[0] = P[1];
+ }
+ else /* if(isupper(ttype[i])) */ {
+ dist = string_edit_distance(S[is][0], S[is][1]);
+ free(S[is][0]);
+ S[is][0] = S[is][1];
+ is++;
+ }
+ printf("%c: %g ", ttype[i], dist);
+ if ((edit_backtrack)&&(ttype[i]!='P')) {
+ if (ttype[i]=='f') unexpand_aligned_F(aligned_line);
+ print_aligned_lines(somewhere);
+ }
+ }
+ printf("\n");
+ n=1;
+ }
+ break;
+ }
+ fflush(stdout);
+ } while(type!=999);
+ return 0;
+}
+
+/*--------------------------------------------------------------------------*/
+
+PRIVATE int parse_input(char *line)
+{
+ int type, rooted=0, i, xx;
+ char *cp;
+
+ if (line==NULL) return 999;
+ if (line[0]=='*') {
+ if (taxa_list==0) {
+ if (task==2) taxa_list=1;
+ printf("%s\n", line);
+ return 0;
+ } else {
+ list_title = strdup(line);
+ return 888;
+ }
+ }
+ if (line[0]=='>') {
+ if (taxa_list)
+ printf("%d :%s\n", n+1, line+1);
+ else printf("%s\n", line);
+ return 0;
+ }
+
+ cp = strchr(line,' ');
+ if (cp) *cp='\0'; /* get rid of junk at the end of line */
+
+ switch (line[0]) {
+ case '.' :
+ type = 1;
+ break;
+
+ case '(' : /* it's a tree */
+ i=1;
+ xx = 0;
+ type = 4; /* coarse */
+ rooted = 0;
+ while (line[i]) {
+ if (line[i]=='.'){
+ type = 1; /* Full */
+ break;
+ }
+ if ( (line[i]=='U')||(line[i]=='P') ) {
+ type = 2; /* FFull */
+ xx = 1;
+ break;
+ }
+ if (line[i]=='S') {
+ type = 3;
+ xx = 1;
+ break; /* Shapiro tree */
+ }
+ if ((line[i]!='(')&&(line[i]!=')')) xx = 1;
+ i++;
+ }
+ if(!xx) type =1;
+
+ rooted = (line[strlen(line)-2]=='R');
+ break;
+ case '@' :
+ return 999;
+
+ default:
+ return 0;
+ }
+
+ switch (type) {
+ case 1:
+ if(check_brackets(line))
+ return 1;
+ break;
+ case 2:
+ if(check_tree(line,"UP") ) {
+ if(rooted) return 2;
+ else return -2;
+ }
+ break;
+ case 3:
+ if(check_tree(line,"HBIMSE") ){
+ if(rooted) return -3;
+ else return -3;
+ }
+ break;
+ case 4:
+ if(check_tree(line,"HBIM") ){
+ if(rooted) return 4;
+ else return -4;
+ }
+ break;
+ }
+ return 0;
+}
+
+/*--------------------------------------------------------------------------*/
+
+
+PRIVATE int check_tree(char *line, char alpha[])
+{
+ int n, i, o;
+ char *pos;
+
+ n = (int) strlen(line);
+
+ if( line[0] != '(' ) return 0;
+ i=o=1;
+ while( line[i] ){
+ while( line[i] == '(' ){
+ o++;
+ i++;
+ }
+ pos=strchr(alpha, (int)line[i]);
+ if (pos) {
+ while (isdigit((int) line[++i]));
+ if (line[i]!=')') return 0;
+ }
+ if (line[i]=='R') {
+ i++;
+ if ((i!=n-1)||(line[i]!=')')) return 0;
+ }
+ if (line[i]==')') {
+ o--;
+ if(o< 0) return 0;
+ if( (i<n)&&(line[i+1]==')') ) return 0;
+ i++;
+ }
+ else return 0;
+ }
+ if(o>0) return 0;
+ return 1;
+}
+
+/*--------------------------------------------------------------------------*/
+
+
+PRIVATE int check_brackets(char *line)
+{
+ int i,o;
+
+ i=o=0;
+ while( line[i] ){
+ switch(line[i]) {
+ case '(' :
+ o++;
+ i++;
+ break;
+ case '.' :
+ i++;
+ break;
+ case ')' :
+ i++;
+ o--;
+ if(o<0) return 0;
+ break;
+ default:
+ return 0;
+ }
+ }
+ if (o>0) return 0;
+ return 1;
+}
+
+/*--------------------------------------------------------------------------*/
+
+
+PRIVATE void command_line(int argc, char *argv[])
+{
+ int i;
+ struct RNAdistance_args_info args_info;
+
+ /* default values */
+ edit_backtrack = 0;
+ types=1; ttype[0]='f'; task=1;
+
+ /*
+ #############################################
+ # check the command line parameters
+ #############################################
+ */
+ if(RNAdistance_cmdline_parser (argc, argv, &args_info) != 0) exit(1);
+
+ /* use specified distance representations */
+ if(args_info.distance_given){
+ strncpy(ttype, args_info.distance_arg,9);
+ types = (int)strlen(ttype);
+ }
+
+ if(args_info.compare_given){
+ switch(args_info.compare_arg[0]){
+ case 'p': task=1; break;
+ case 'm': task=2; break;
+ case 'f': task=3; break;
+ case 'c': task=4; break;
+ default: RNAdistance_cmdline_parser_print_help();
+ exit(EXIT_FAILURE);
+ }
+ }
+
+ if(args_info.shapiro_given)
+ cost_matrix = 1;
+
+ if(args_info.backtrack_given){
+ if(strcmp(args_info.backtrack_arg, "none")){
+ strncpy(outfile, args_info.backtrack_arg, FILENAME_MAX_LENGTH-1);
+ }
+ edit_backtrack = 1;
+ }
+
+ /* free allocated memory of command line data structure */
+ RNAdistance_cmdline_parser_free (&args_info);
+}
+
+/*--------------------------------------------------------------------------*/
+
+PRIVATE void print_aligned_lines(FILE *somewhere)
+{
+ if (edit_backtrack) {
+ fprintf(somewhere, "\n%s\n%s\n", aligned_line[0], aligned_line[1]);
+ fflush(somewhere);
+ }
+}