--- /dev/null
+/*
+ parse and convert secondary structures
+ Walter Fontana, Ivo L Hofacker, Peter F Stadler
+ Vienna RNA Package
+*/
+/* Last changed Time-stamp: <2005-07-23 10:12:19 ivo> */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include "utils.h"
+#include "RNAstruct.h"
+
+#define PRIVATE static
+#define PUBLIC
+
+#define MAXLEN 10000
+
+static char rcsid[] = "$Id: RNAstruct.c,v 1.8 2006/05/09 20:44:33 ivo Exp $";
+
+
+PRIVATE char *aux_struct(const char *structure);
+
+/* on return from parse_structure(), b2C() or b2Shapiro() ... */
+PUBLIC int loop_size[STRUC]; /* contains loop sizes of a structure */
+PUBLIC int helix_size[STRUC]; /* contains helix sizes of a structure */
+PUBLIC int loop_degree[STRUC]; /* contains loop degrees of a structure */
+PUBLIC int loops; /* n of loops and stacks in a structure */
+PUBLIC int unpaired, pairs; /* n of unpaired digits and pairs */
+
+/*---------------------------------------------------------------------------*/
+
+PRIVATE char *aux_struct(const char* structure )
+{
+ short *match_paren;
+ int i, o, p;
+ char *string;
+
+ string = (char *) space(sizeof(char)*(strlen(structure)+1));
+ match_paren = (short *) space(sizeof(short)*(strlen(structure)/2+1));
+ strcpy(string, structure);
+
+ i = o = 0;
+ while (string[i]) {
+ switch (string[i]) {
+ case '.': break;
+ case '(':
+ match_paren[++o]=i;
+ break;
+ case ')':
+ p=i;
+ while ((string[p+1]==')')&&(match_paren[o-1]==match_paren[o]-1)) {
+ p++; o--;
+ }
+ string[p]=']';
+ i=p;
+ string[match_paren[o]]='[';
+ o--;
+ break;
+ default:
+ nrerror("Junk in structure at aux_structure\n");
+ }
+ i++;
+ }
+ free(match_paren);
+ return(string);
+}
+
+/*---------------------------------------------------------------------------*/
+
+PUBLIC char *b2HIT(const char *structure)
+{
+
+ int i, u, p, l;
+ char *string, *temp, *HIT, tt[10];
+
+ temp = (char *) space(strlen(structure)*4+4);
+ string = aux_struct( structure );
+
+ strcpy(temp,"(");
+ i=p=u=0; l=1;
+ while (string[i]) {
+ switch(string[i]) {
+ case '.':
+ u++; break;
+ case '[':
+ if (u>0) {
+ sprintf(tt, "(U%d)" , u);
+ strcat(temp+l, tt);
+ l+=strlen(tt);
+ u=0;
+ }
+ strcat(temp+l, "("); l++;
+ break;
+ case ')':
+ if (u>0) {
+ sprintf(tt, "(U%d)" , u);
+ strcat(temp+l, tt);
+ l+=strlen(tt);
+ u=0;
+ }
+ p++;
+ break;
+ case ']':
+ if (u>0) {
+ sprintf(tt, "(U%d)" , u);
+ strcat(temp+l, tt);
+ l+=strlen(tt);
+ u=0;
+ }
+ sprintf(tt,"P%d)", p+1);
+ strcat(temp+l, tt);
+ l+=strlen(tt);
+ p=0;
+ break;
+ }
+ i++;
+ }
+ if (u>0) {
+ sprintf(tt, "(U%d)" , u);
+ strcat(temp+l, tt);
+ l+=strlen(tt);
+ }
+ strcat(temp+l, "R)");
+
+ free( string );
+
+ HIT = (char *) space(sizeof(char)*(strlen(temp)+2));
+ strcpy(HIT, temp);
+ free(temp);
+ return(HIT);
+}
+
+/*---------------------------------------------------------------------------*/
+
+PUBLIC char *b2C(const char *structure )
+{
+ short *bulge, *loop;
+
+ int i, lp, p, l;
+ char *string, *Coarse, *temp;
+
+ bulge = (short *) space(sizeof(short)*(strlen(structure)/3+1));
+ loop = (short *) space(sizeof(short)*(strlen(structure)/3+1));
+ temp = (char *) space(4*strlen(structure)+2);
+
+ for (i = 0; i < STRUC; i++) {
+ loop_size[i] = helix_size[i] = 0;
+ }
+ loop_degree[0]=0; /* open structure has degree 0 */
+ pairs = unpaired = loops = lp = 0;
+ loop[0]=0;
+
+ string = aux_struct( structure );
+
+ i=p=l=0;
+ temp[l++] = '(';
+ while (string[i]) {
+ switch(string[i]) {
+ case '.':
+ loop_size[loop[lp]]++;
+ break;
+ case '[':
+ temp[l++]='(';
+ if ((i>0)&&(string[i-1]=='(')) bulge[lp]=1;
+ lp++;
+ loop_degree[++loops]=1;
+ loop[lp]=loops;
+ bulge[lp]=0;
+ break;
+ case ')':
+ if (string[i-1]==']') bulge[lp]=1;
+ p++;
+ break;
+ case ']':
+ if (string[i-1]==']') bulge[lp]=1;
+ switch (loop_degree[loop[lp]]) {
+ case 1: temp[l++]='H'; break; /* hairpin */
+ case 2:
+ if (bulge[lp]==1)
+ temp[l++] = 'B'; /* bulge */
+ else
+ temp[l++] = 'I'; /* internal loop */
+ break;
+ default: temp[l++] = 'M'; /* multiloop */
+ }
+ temp[l++] = ')';
+ pairs+=p+1;
+ p=0;
+ loop_degree[loop[--lp]]++;
+ break;
+ }
+ i++;
+ }
+ temp[l++] = 'R';
+ temp[l++] = ')';
+ temp[l]='\0';
+ free(string);
+ Coarse = (char *) space(sizeof(char)*(strlen(temp)+2));
+ strcpy(Coarse, temp);
+ free(temp);
+ free(bulge); free(loop);
+ return(Coarse);
+}
+
+/*---------------------------------------------------------------------------*/
+
+PUBLIC char *b2Shapiro(const char *structure )
+{
+
+ short *bulge, *loop;
+
+ int i, lp, p, l, k;
+ char *string, *Shapiro, *temp, tt[10];
+
+ bulge = (short *) space(sizeof(short)*(strlen(structure)/3+1));
+ loop = (short *) space(sizeof(short)*(strlen(structure)/3+1));
+ temp = (char *) space(4*strlen(structure)+3);
+
+ for (i = 0; i < STRUC; i++) {
+ loop_size[i] = helix_size[i] = 0;
+ }
+ loop_degree[0]=0; /* open structure has degree 0 */
+ pairs = unpaired = loops = lp = 0;
+ loop[0]=0;
+
+ string = aux_struct( structure );
+
+ i=p=l=0;
+ temp[l++] = '('; /* root */
+ while (string[i]) {
+ switch(string[i]) {
+ case '.':
+ unpaired++;
+ loop_size[loop[lp]]++;
+ break;
+ case '[':
+ temp[l++]='(';
+ temp[l++]='(';
+ if ((i>0)&&(string[i-1]=='(' || string[i-1]=='['))
+ bulge[lp]=1;
+ lp++;
+ loop_degree[++loops]=1;
+ loop[lp]=loops;
+ bulge[lp]=0;
+ break;
+ case ')':
+ if (string[i-1]==']') bulge[lp]=1;
+ p++;
+ break;
+ case ']':
+ if (string[i-1]==']') bulge[lp]=1;
+ switch (loop_degree[loop[lp]]) {
+ case 1: temp[l++]='H'; break; /* hairpin */
+ case 2:
+ if (bulge[lp]==1)
+ temp[l++] = 'B'; /* bulge */
+ else
+ temp[l++] = 'I'; /* internal loop */
+ break;
+ default: temp[l++] = 'M'; /* multiloop */
+ }
+ helix_size[loop[lp]]=p+1;
+
+ sprintf(tt, "%d)" , loop_size[loop[lp]]);
+ for(k=0; k<strlen(tt); k++) temp[l++] = tt[k];
+ sprintf(tt, "S%d)" , helix_size[loop[lp]]);
+ for(k=0; k<strlen(tt); k++) temp[l++] = tt[k];
+
+ pairs+=p+1;
+ p=0;
+ loop_degree[loop[--lp]]++;
+ break;
+ }
+ i++;
+ }
+
+ *tt = '\0';
+ if (loop_size[0]) sprintf(tt, "E%d)" , loop_size[0]);
+ strcat(tt,"R)");
+ temp[l]='\0';
+ strcat(temp, tt);
+ Shapiro = (char *) space(sizeof(char)*(strlen(temp)+2));
+ if (loop_size[0]) {
+ Shapiro[0]='(';
+ strcpy(Shapiro+1, temp);
+ } else strcpy(Shapiro, temp);
+ free(string);
+ free(temp);
+ free(loop); free(bulge);
+ return Shapiro;
+}
+
+
+
+
+/*---------------------------------------------------------------------------*/
+
+PUBLIC void parse_structure(const char *structure)
+
+/*-----------------------------------------------------------------------------
+
+ upon return from parse_structure():
+
+ loops ....................... number of loops or stacks in structure.
+ loop_size[1 <= i <= loops] ..... size of i-th loop.
+ loop_size[0] ................... number of external digits.
+ loop_degree[1 <= i <= loops] ... degree (branches) of i-th loop.
+ loop_degree[0] ................. number of components.
+ helix_size[1 <= i <= loops] .... size of i-th stack.
+ unpaired ....................... n of unpaired digits.
+ pairs .......................... n of base pairs.
+
+-----------------------------------------------------------------------------*/
+
+{
+ short *bulge, *loop;
+
+ int i, lp, p;
+ char *string, *temp;
+
+ temp = (char *) space(strlen(structure)*4+2);
+ bulge = (short *) space(sizeof(short)*(strlen(structure)/3+1));
+ loop = (short *) space(sizeof(short)*(strlen(structure)/3+1));
+
+ for (i = 0; i < STRUC; i++) {
+ loop_size[i] = helix_size[i] = 0;
+ }
+ loop[0] = loop_degree[0]=0; /* open structure has degree 0 */
+ pairs = unpaired = loops = lp = 0;
+ *temp='\0';
+
+ string = aux_struct(structure);
+
+ i=p=0;
+ while (string[i]) {
+ switch(string[i]) {
+ case '.':
+ unpaired++;
+ loop_size[loop[lp]]++;
+ break;
+ case '[':
+ if ((i>0)&&(string[i-1]=='(')) bulge[lp]=1;
+ lp++;
+ loop_degree[++loops]=1;
+ loop[lp]=loops;
+ bulge[lp]=0;
+ break;
+ case ')':
+ if (string[i-1]==']') bulge[lp]=1;
+ p++;
+ break;
+ case ']':
+ if (string[i-1]==']') bulge[lp]=1;
+ helix_size[loop[lp]]=p+1;
+ pairs+=p+1;
+ p=0;
+ loop_degree[loop[--lp]]++;
+ break;
+ }
+ i++;
+ }
+ free(string);
+ free(bulge); free(loop);
+ free(temp);
+}
+
+/*---------------------------------------------------------------------------*/
+
+PUBLIC char *add_root(const char *structure)
+{
+ char *xS;
+ xS = (char *) space(sizeof(char)*(strlen(structure)+4));
+ xS[0] = '(';
+ strcat(xS,structure);
+ strcat(xS,"R)");
+ return xS;
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+PUBLIC char *expand_Shapiro(const char *structure)
+{
+ char *xS, *temp;
+ int i, l;
+
+ temp = (char *) space(4*strlen(structure)+2);
+
+ i = 1;
+ l = 1;
+ temp[0] = '(';
+ while (i<strlen(structure)-1) {
+ temp[l++] = structure[i];
+ if (structure[i] == '(') temp[l++] = '(';
+ else if (structure[i] == ')') {
+ temp[l++] = 'S';
+ temp[l++] = ')';
+ }
+ i++;
+ }
+ temp[l++] = ')';
+ temp[l] = '\0';
+
+ xS = (char *) space(sizeof(char)*(strlen(temp)+1));
+ strcpy(xS, temp);
+ free(temp);
+ return (xS);
+}
+
+/*---------------------------------------------------------------------------*/
+
+PUBLIC char *expand_Full(const char *structure)
+{
+ char *xF, *temp;
+ int i, l;
+
+ temp = (char *) space(4*strlen(structure)+2);
+
+ i = 0;
+ l = 0;
+ while (structure[i]) {
+ if (structure[i] == '(') temp[l++] = '(';
+ else if (structure[i] == ')') {
+ temp[l++] = 'P';
+ temp[l++] = ')';
+ }
+ else {
+ temp[l++] = '(';
+ temp[l++] = 'U';
+ temp[l++] = ')';
+ }
+ i++;
+ }
+ temp[l] = '\0';
+
+ xF = (char *) space(sizeof(char)*(l+5));
+ strcpy(xF, "(");
+ strcat(xF, temp);
+ strcat(xF, "R)");
+ free(temp);
+ return (xF);
+}
+
+/*---------------------------------------------------------------------------*/
+
+PUBLIC char *unexpand_Full(const char *structure)
+{
+ short *match_paren;
+ char id[10], *full, *temp;
+ int i, j, k, l, o, w;
+
+ temp = (char *) space(4*strlen(structure)+2);
+ match_paren = (short *) space(sizeof(short)*(strlen(structure)/2+1));
+
+ i = strlen(structure)-1;
+ l = o = 0; k=9;
+ id[9]='\0';
+ while (i>=0) {
+ switch (structure[i]) {
+ case '(':
+ for (j=0; j<match_paren[o]; j++) temp[l++]='(';
+ match_paren[o--] = 0;
+ break;
+ case 'U':
+ w=1;
+ sscanf(id+k, "%d", &w);
+ for (j=0; j<w; j++) temp[l++]='.';
+ k=9;
+ break;
+ case 'P':
+ w=1;
+ sscanf(id+k, "%d", &w);
+ for (j=0; j<w; j++) temp[l++]=')';
+ match_paren[o]=w;
+ k=9;
+ break;
+ case 'R':
+ break;
+ case ')':
+ o++;
+ break;
+ default:
+ id[--k]=structure[i];
+ }
+ i--;
+ }
+
+ temp[l] = '\0';
+ full = (char *) space(sizeof(char)*(l+1));
+ for (i=0; i<l; i++) full[i]=temp[l-i-1];
+ full[l]='\0';
+ free(temp);
+ free(match_paren);
+ return full;
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+PUBLIC char *unweight(const char *structure)
+{
+ int i,l;
+ char *full, *temp;
+
+ temp = (char *) space(4*strlen(structure)+1);
+
+ i=l=0;
+ while (structure[i]) {
+ if (!isdigit((int)structure[i])) temp[l++]=structure[i];
+ i++;
+ }
+ temp[l]='\0';
+ full = (char *) space(sizeof(char)*(l+1));
+ strcpy(full, temp);
+ free(temp);
+ return full;
+}
+
+/*---------------------------------------------------------------------------*/
+
+PUBLIC void unexpand_aligned_F(char *align[2])
+{
+ char *t0, *t1;
+ int i,l;
+
+ t0 = (char *) space(strlen(align[0])+1);
+ t1 = (char *) space(strlen(align[0])+1);
+
+ for (i=0, l=0; i<strlen(align[0]); i++) {
+ switch (align[0][i]) {
+ case '(':
+ case ')':
+ t0[l] = align[0][i];
+ t1[l++]=align[1][i];
+ break;
+ case 'U':
+ switch (align[1][i]) {
+ case 'U':
+ t0[l-1]=t1[l-1]='.';
+ break;
+ case '_':
+ t0[l-1]='.';
+ t1[l-1]='_';
+ break;
+ case 'P':
+ t0[l-1]='_'; t0[l]='.';
+ t1[l-1]='('; t1[l]=')'; l++;
+ }
+ while (align[0][i]!=')') i++;
+ break;
+ case '_':
+ switch (align[1][i]) {
+ case '(':
+ case ')':
+ t0[l] = align[0][i];
+ t1[l++]=align[1][i];
+ break;
+ case 'U':
+ while (align[1][i]!=')') i++;
+ t1[l-1]='.';
+ t0[l-1]='_';
+ break;
+ }
+ case 'P':
+ if (align[1][i]=='U') {
+ t1[l-1]='_'; t1[l]='.'; t0[l++]=')';
+ while (align[0][i]!=')') i++;
+ }
+ break;
+ }
+ }
+ t0[l-1]=t1[l-1]='\0';
+ strcpy(align[0], t0+1);
+ strcpy(align[1], t1+1);
+ free(t0); free(t1);
+}