WSTester updated to work plus hopefully all the other changes that need to go into...
[jabaws.git] / binaries / src / ViennaRNA / Progs / RNAdistance.c
diff --git a/binaries/src/ViennaRNA/Progs/RNAdistance.c b/binaries/src/ViennaRNA/Progs/RNAdistance.c
new file mode 100644 (file)
index 0000000..ea5dfa4
--- /dev/null
@@ -0,0 +1,548 @@
+/*
+                    Distances of Secondary Structures
+           Walter Fontana, Ivo L Hofacker, Peter F Stadler
+                          Vienna RNA Package
+*/
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <ctype.h>
+#include <unistd.h>
+#include <string.h>
+#include "dist_vars.h"
+#include "RNAstruct.h"
+#include "treedist.h"
+#include "stringdist.h"
+#include "utils.h"
+#include "data_structures.h"
+#include "RNAdistance_cmdl.h"
+
+#define MAXNUM      1000    /* max number of structs for distance matrix */
+
+#define PUBLIC
+#define PRIVATE     static
+/*@unused@*/
+static char rcsid[] = "$Id: RNAdistance.c,v 1.8 2005/07/24 08:35:15 ivo Exp $";
+PRIVATE void command_line(int argc, char *argv[]);
+PRIVATE int parse_input(char *line);
+PRIVATE int check_tree(char *line, char alpha[]);
+PRIVATE int check_brackets(char *line);
+PRIVATE void print_aligned_lines(FILE *somewhere);
+
+PRIVATE char  ruler[] ="....,....1....,....2....,....3....,....4"
+"....,....5....,....6....,....7....,....8";
+PRIVATE int types=1;
+PRIVATE int task;
+PRIVATE int taxa_list;
+PRIVATE char outfile[FILENAME_MAX_LENGTH], *list_title;
+
+PRIVATE char ttype[10]="f";
+PRIVATE int n=0;
+
+int main(int argc, char *argv[])
+{
+  char     *line=NULL, *xstruc, *cc;
+  Tree     *T[10][MAXNUM];
+  int       tree_types = 0, ttree;
+  swString *S[10][MAXNUM];
+  char     *P[MAXNUM];   /* structures for base pair distances */
+  int       string_types = 0, tstr;
+  int       i,j, tt, istty, type;
+  int       it, is;
+  FILE     *somewhere=NULL;
+
+  command_line(argc, argv);
+
+  if((outfile[0]=='\0')&&(task==2)&&(edit_backtrack))
+    strcpy(outfile,"backtrack.file");
+  if(outfile[0]!='\0') somewhere = fopen(outfile,"w");
+  if (somewhere==NULL) somewhere = stdout;
+  istty = isatty(fileno(stdin))&&isatty(fileno(stdout));
+
+  do {
+    if ((istty)&&(n==0)) {
+      printf("\nInput structure;  @ to quit\n");
+      printf("%s\n", ruler);
+    }
+    do {
+      if (line!=NULL) free(line);
+      line=get_line(stdin);
+    } while ((type=parse_input(line))==0);
+
+    if (((type==999)||(type==888))&&(task==2)) {  /* do matrices */
+      if (taxa_list)
+        printf("* END of taxa list\n");
+
+      ttree = 0; tstr = 0;
+      for (tt=0; tt< types; tt++) {
+        printf("> %c   %d\n", ttype[tt], n);
+        if(islower(ttype[tt])) {
+          for (i=1; i<n; i++) {
+            for (j=0; j<i; j++) {
+              printf("%g ",tree_edit_distance(T[ttree][i], T[ttree][j]));
+              if(edit_backtrack) {
+                fprintf(somewhere,"%d %d",i+1,j+1);
+                if (ttype[tt]=='f') unexpand_aligned_F(aligned_line);
+                print_aligned_lines(somewhere);
+              }
+            }
+            printf("\n");
+          }
+          printf("\n");
+          for (i=0; i<n; i++) free_tree(T[ttree][i]);
+          ttree++;
+        }
+        if (ttype[tt]=='P') {
+          for (i=1; i<n; i++) {
+            for (j=0; j<i; j++)
+              printf("%g ", (float) bp_distance(P[i], P[j]));
+            printf("\n");
+          }
+          printf("\n");
+          for (i=0; i<n; i++) free(P[i]);
+        }
+        else if(isupper(ttype[tt])) {
+          for (i=1; i<n; i++) {
+            for (j=0; j<i; j++) {
+              printf("%g ",string_edit_distance(S[tstr][i], S[tstr][j]));
+              if (edit_backtrack) {
+                fprintf(somewhere,"%d %d",i+1,j+1);
+                if (ttype[tt]=='F') unexpand_aligned_F(aligned_line);
+                print_aligned_lines(somewhere);
+              }
+            }
+            printf("\n");
+          }
+          printf("\n");
+          for (i=0; i<n; i++) free(S[tstr][i]);
+          tstr++;
+        }
+      }
+      fflush(stdout);
+      if (type==888) {  /* do another distance matrix */
+        n = 0;
+        printf("%s\n", list_title);
+        free(list_title);
+        continue;
+      }
+    }                       /* END do matrices */
+    if (type==999) {   /* finito */
+      if (outfile[0]!='\0') fclose(somewhere);
+      return 0;
+    }
+
+    if (type<0) {
+      xstruc = add_root(line);
+      free(line);
+      line = xstruc;
+      type = -type;
+    }
+    if (type==2) {
+      xstruc = unexpand_Full(line);
+      free(line);
+      line = xstruc;
+      type=1;
+    }
+    tree_types   = 0;
+    string_types = 0;
+    for(tt=0; tt < types; tt++) {
+      switch(ttype[tt]){
+      case 'f' :
+      case 'F' :
+        if (type!=1) nrerror("Can't convert back to full structure");
+        xstruc = expand_Full(line);
+        if(islower(ttype[tt])) {  /* tree_edit */
+          T[tree_types++][n] = make_tree(xstruc);
+        }
+        if(isupper(ttype[tt])) { /* string edit */
+          S[string_types++][n] = Make_swString(xstruc);
+        }
+        free(xstruc);
+        break;
+      case 'P':
+        if (type!=1) nrerror("Can't convert back to full structure");
+        P[n] = strdup(line);
+        break;
+      case 'h' :
+      case 'H' :
+        switch (type) {
+        case 1:
+          xstruc = b2HIT(line);
+          if(islower(ttype[tt])) {
+            T[tree_types++][n] = make_tree(xstruc);
+          }
+          if(isupper(ttype[tt])) {
+            S[string_types++][n] = Make_swString(xstruc);
+          }
+          free(xstruc);
+          break;
+        default:
+          nrerror("Can't convert to HIT structure");
+        }
+        break;
+      case 'c' :
+      case 'C' :
+        switch (type) {
+        case 1:
+          cc = b2C(line);
+          xstruc = expand_Shapiro(cc);
+          free(cc);
+          break;
+        case 4:
+          cc = expand_Shapiro(line);
+          xstruc = unweight(cc);
+          free(cc);
+          break;
+        case 3:
+          xstruc = unweight(line);
+          break;
+        default:
+          nrerror("Unknown structure representation");
+          exit(0);
+        }
+        if(islower(ttype[tt])) {
+          T[tree_types++][n] = make_tree(xstruc);
+        }
+        if(isupper(ttype[tt])) {
+          S[string_types++][n] = Make_swString(xstruc);
+        }
+        free(xstruc);
+        break;
+      case 'w' :
+      case 'W' :
+        if (type==1) {
+          xstruc = b2Shapiro(line);
+          if(islower(ttype[tt])) {
+            T[tree_types++][n] = make_tree(xstruc);
+          }
+          if(isupper(ttype[tt])) {
+            S[string_types++][n] = Make_swString(xstruc);
+          }
+          free(xstruc);
+        }
+        else {
+          if(islower(ttype[tt])) {
+            T[tree_types++][n] = make_tree(line);
+          }
+          if(isupper(ttype[tt])) {
+            S[string_types++][n] = Make_swString(line);
+          }
+        }
+        break;
+      default:
+        nrerror("Unknown distance type");
+      }
+    }
+    n++;
+    switch (task) {
+      float     dist;
+    case 1:
+      if (n==2) {
+        for (it=0, is=0, i=0; i<types; i++) {
+          if(islower(ttype[i])) {
+            dist = tree_edit_distance(T[it][0], T[it][1]);
+            free_tree(T[it][0]); free_tree(T[it][1]);
+            it++;
+          }
+          else if (ttype[i]=='P') {
+            dist = (float) bp_distance(P[0], P[1]);
+            free(P[0]); free(P[1]);
+          }
+          else /* isupper(ttype[i]) */ {
+            dist = string_edit_distance(S[is][0], S[is][1]);
+            free(S[is][0]); free(S[is][1]);
+            is++;
+          }
+          printf("%c: %g  ", ttype[i], dist);
+          if ((edit_backtrack)&&(ttype[i]!='P')) {
+            if (ttype[i]=='f') unexpand_aligned_F(aligned_line);
+            print_aligned_lines(somewhere);
+          }
+        }
+        printf("\n");
+        n=0;
+      }
+      break;
+    case 3:
+      if (n>1) {
+        for (it=0, is=0, i=0; i<types; i++) {
+          if(islower(ttype[i])) {
+            dist = tree_edit_distance(T[it][1], T[it][0]);
+            free_tree(T[it][1]);
+            it++;
+          }
+          else if (ttype[i]=='P') {
+            dist = (float) bp_distance(P[0], P[1]);
+            free(P[1]);
+          }
+          else /* if(isupper(ttype[i])) */ {
+            dist = string_edit_distance(S[is][0], S[is][1]);
+            free(S[is][1]);
+            is++;
+          }
+          printf("%c: %g  ", ttype[i], dist);
+          if ((edit_backtrack)&&(ttype[i]!='P')) {
+            if (ttype[i]=='f') unexpand_aligned_F(aligned_line);
+            print_aligned_lines(somewhere);
+          }
+        }
+        printf("\n");
+        n=1;
+      }
+      break;
+    case 4:
+      if (n>1) {
+        for (it=0, is=0, i=0; i<types; i++) {
+          if(islower(ttype[i])) {
+            dist = tree_edit_distance(T[it][1], T[it][0]);
+            free_tree(T[it][0]);
+            T[it][0] = T[it][1];
+            it++;
+          }
+          else if (ttype[i]=='P') {
+            dist = (float) bp_distance(P[0], P[1]);
+            free(P[0]); P[0] = P[1];
+          }
+          else /* if(isupper(ttype[i])) */ {
+            dist = string_edit_distance(S[is][0], S[is][1]);
+            free(S[is][0]);
+            S[is][0] = S[is][1];
+            is++;
+          }
+          printf("%c: %g  ", ttype[i], dist);
+          if ((edit_backtrack)&&(ttype[i]!='P')) {
+            if (ttype[i]=='f') unexpand_aligned_F(aligned_line);
+            print_aligned_lines(somewhere);
+          }
+        }
+        printf("\n");
+        n=1;
+      }
+      break;
+    }
+    fflush(stdout);
+  } while(type!=999);
+  return 0;
+}
+
+/*--------------------------------------------------------------------------*/
+
+PRIVATE int parse_input(char *line)
+{
+  int type, rooted=0, i, xx;
+  char *cp;
+
+  if (line==NULL) return 999;
+  if (line[0]=='*') {
+    if (taxa_list==0) {
+      if (task==2) taxa_list=1;
+      printf("%s\n", line);
+      return 0;
+    } else {
+      list_title = strdup(line);
+      return 888;
+    }
+  }
+  if (line[0]=='>') {
+    if (taxa_list)
+      printf("%d :%s\n", n+1, line+1);
+    else printf("%s\n", line);
+    return 0;
+  }
+
+  cp = strchr(line,' ');
+  if (cp) *cp='\0';      /* get rid of junk at the end of line */
+
+  switch (line[0]) {
+  case '.' :
+    type = 1;
+    break;
+
+  case '(' :            /* it's a tree */
+    i=1;
+    xx = 0;
+    type = 4;           /* coarse */
+    rooted = 0;
+    while (line[i])  {
+      if (line[i]=='.'){
+        type = 1;     /* Full */
+        break;
+      }
+      if ( (line[i]=='U')||(line[i]=='P') ) {
+        type = 2;     /* FFull */
+        xx = 1;
+        break;
+      }
+      if (line[i]=='S') {
+        type = 3;
+        xx = 1;
+        break;        /* Shapiro tree */
+      }
+      if ((line[i]!='(')&&(line[i]!=')')) xx = 1;
+      i++;
+    }
+    if(!xx) type =1;
+
+    rooted = (line[strlen(line)-2]=='R');
+    break;
+  case '@' :
+    return 999;
+
+  default:
+    return 0;
+  }
+
+  switch (type) {
+  case 1:
+    if(check_brackets(line))
+      return 1;
+    break;
+  case 2:
+    if(check_tree(line,"UP") ) {
+      if(rooted) return 2;
+      else return -2;
+    }
+    break;
+  case 3:
+    if(check_tree(line,"HBIMSE") ){
+      if(rooted) return -3;
+      else return -3;
+    }
+    break;
+  case 4:
+    if(check_tree(line,"HBIM") ){
+      if(rooted) return 4;
+      else return -4;
+    }
+    break;
+  }
+  return 0;
+}
+
+/*--------------------------------------------------------------------------*/
+
+
+PRIVATE int check_tree(char *line, char alpha[])
+{
+  int n, i, o;
+  char *pos;
+
+  n = (int) strlen(line);
+
+  if( line[0] != '(' ) return 0;
+  i=o=1;
+  while( line[i] ){
+    while( line[i] == '(' ){
+      o++;
+      i++;
+    }
+    pos=strchr(alpha, (int)line[i]);
+    if (pos) {
+      while (isdigit((int) line[++i]));
+      if (line[i]!=')') return 0;
+    }
+    if (line[i]=='R') {
+      i++;
+      if ((i!=n-1)||(line[i]!=')')) return 0;
+    }
+    if (line[i]==')') {
+      o--;
+      if(o< 0) return 0;
+      if( (i<n)&&(line[i+1]==')') ) return 0;
+      i++;
+    }
+    else return 0;
+  }
+  if(o>0) return 0;
+  return 1;
+}
+
+/*--------------------------------------------------------------------------*/
+
+
+PRIVATE int check_brackets(char *line)
+{
+  int i,o;
+
+  i=o=0;
+  while( line[i] ){
+    switch(line[i]) {
+    case '(' :
+      o++;
+      i++;
+      break;
+    case '.' :
+      i++;
+      break;
+    case ')' :
+      i++;
+      o--;
+      if(o<0) return 0;
+      break;
+    default:
+      return 0;
+    }
+  }
+  if (o>0) return 0;
+  return 1;
+}
+
+/*--------------------------------------------------------------------------*/
+
+
+PRIVATE void command_line(int argc, char *argv[])
+{
+  int i;
+  struct        RNAdistance_args_info args_info;
+
+  /* default values */
+  edit_backtrack = 0;
+  types=1; ttype[0]='f'; task=1;
+
+  /*
+  #############################################
+  # check the command line parameters
+  #############################################
+  */
+  if(RNAdistance_cmdline_parser (argc, argv, &args_info) != 0) exit(1);
+
+  /* use specified distance representations */
+  if(args_info.distance_given){
+    strncpy(ttype, args_info.distance_arg,9);
+    types = (int)strlen(ttype);
+  }
+
+  if(args_info.compare_given){
+    switch(args_info.compare_arg[0]){
+      case 'p': task=1; break;
+      case 'm': task=2; break;
+      case 'f': task=3; break;
+      case 'c': task=4; break;
+      default:  RNAdistance_cmdline_parser_print_help();
+                exit(EXIT_FAILURE);
+    }
+  }
+
+  if(args_info.shapiro_given)
+    cost_matrix = 1;
+
+  if(args_info.backtrack_given){
+    if(strcmp(args_info.backtrack_arg, "none")){
+      strncpy(outfile, args_info.backtrack_arg, FILENAME_MAX_LENGTH-1);
+    }
+    edit_backtrack = 1;
+  }
+
+  /* free allocated memory of command line data structure */
+  RNAdistance_cmdline_parser_free (&args_info);
+}
+
+/*--------------------------------------------------------------------------*/
+
+PRIVATE void print_aligned_lines(FILE *somewhere)
+{
+  if (edit_backtrack) {
+    fprintf(somewhere, "\n%s\n%s\n", aligned_line[0], aligned_line[1]);
+    fflush(somewhere);
+  }
+}