2 parse and convert secondary structures
3 Walter Fontana, Ivo L Hofacker, Peter F Stadler
6 /* Last changed Time-stamp: <2005-07-23 10:12:19 ivo> */
14 #include "RNAstruct.h"
16 #define PRIVATE static
21 static char rcsid[] = "$Id: RNAstruct.c,v 1.8 2006/05/09 20:44:33 ivo Exp $";
24 PRIVATE char *aux_struct(const char *structure);
26 /* on return from parse_structure(), b2C() or b2Shapiro() ... */
27 PUBLIC int loop_size[STRUC]; /* contains loop sizes of a structure */
28 PUBLIC int helix_size[STRUC]; /* contains helix sizes of a structure */
29 PUBLIC int loop_degree[STRUC]; /* contains loop degrees of a structure */
30 PUBLIC int loops; /* n of loops and stacks in a structure */
31 PUBLIC int unpaired, pairs; /* n of unpaired digits and pairs */
33 /*---------------------------------------------------------------------------*/
35 PRIVATE char *aux_struct(const char* structure )
41 string = (char *) space(sizeof(char)*(strlen(structure)+1));
42 match_paren = (short *) space(sizeof(short)*(strlen(structure)/2+1));
43 strcpy(string, structure);
54 while ((string[p+1]==')')&&(match_paren[o-1]==match_paren[o]-1)) {
59 string[match_paren[o]]='[';
63 nrerror("Junk in structure at aux_structure\n");
71 /*---------------------------------------------------------------------------*/
73 PUBLIC char *b2HIT(const char *structure)
77 char *string, *temp, *HIT, tt[10];
79 temp = (char *) space(strlen(structure)*4+4);
80 string = aux_struct( structure );
90 sprintf(tt, "(U%d)" , u);
95 strcat(temp+l, "("); l++;
99 sprintf(tt, "(U%d)" , u);
108 sprintf(tt, "(U%d)" , u);
113 sprintf(tt,"P%d)", p+1);
122 sprintf(tt, "(U%d)" , u);
126 strcat(temp+l, "R)");
130 HIT = (char *) space(sizeof(char)*(strlen(temp)+2));
136 /*---------------------------------------------------------------------------*/
138 PUBLIC char *b2C(const char *structure )
143 char *string, *Coarse, *temp;
145 bulge = (short *) space(sizeof(short)*(strlen(structure)/3+1));
146 loop = (short *) space(sizeof(short)*(strlen(structure)/3+1));
147 temp = (char *) space(4*strlen(structure)+2);
149 for (i = 0; i < STRUC; i++) {
150 loop_size[i] = helix_size[i] = 0;
152 loop_degree[0]=0; /* open structure has degree 0 */
153 pairs = unpaired = loops = lp = 0;
156 string = aux_struct( structure );
163 loop_size[loop[lp]]++;
167 if ((i>0)&&(string[i-1]=='(')) bulge[lp]=1;
169 loop_degree[++loops]=1;
174 if (string[i-1]==']') bulge[lp]=1;
178 if (string[i-1]==']') bulge[lp]=1;
179 switch (loop_degree[loop[lp]]) {
180 case 1: temp[l++]='H'; break; /* hairpin */
183 temp[l++] = 'B'; /* bulge */
185 temp[l++] = 'I'; /* internal loop */
187 default: temp[l++] = 'M'; /* multiloop */
192 loop_degree[loop[--lp]]++;
201 Coarse = (char *) space(sizeof(char)*(strlen(temp)+2));
202 strcpy(Coarse, temp);
204 free(bulge); free(loop);
208 /*---------------------------------------------------------------------------*/
210 PUBLIC char *b2Shapiro(const char *structure )
216 char *string, *Shapiro, *temp, tt[10];
218 bulge = (short *) space(sizeof(short)*(strlen(structure)/3+1));
219 loop = (short *) space(sizeof(short)*(strlen(structure)/3+1));
220 temp = (char *) space(4*strlen(structure)+3);
222 for (i = 0; i < STRUC; i++) {
223 loop_size[i] = helix_size[i] = 0;
225 loop_degree[0]=0; /* open structure has degree 0 */
226 pairs = unpaired = loops = lp = 0;
229 string = aux_struct( structure );
232 temp[l++] = '('; /* root */
237 loop_size[loop[lp]]++;
242 if ((i>0)&&(string[i-1]=='(' || string[i-1]=='['))
245 loop_degree[++loops]=1;
250 if (string[i-1]==']') bulge[lp]=1;
254 if (string[i-1]==']') bulge[lp]=1;
255 switch (loop_degree[loop[lp]]) {
256 case 1: temp[l++]='H'; break; /* hairpin */
259 temp[l++] = 'B'; /* bulge */
261 temp[l++] = 'I'; /* internal loop */
263 default: temp[l++] = 'M'; /* multiloop */
265 helix_size[loop[lp]]=p+1;
267 sprintf(tt, "%d)" , loop_size[loop[lp]]);
268 for(k=0; k<strlen(tt); k++) temp[l++] = tt[k];
269 sprintf(tt, "S%d)" , helix_size[loop[lp]]);
270 for(k=0; k<strlen(tt); k++) temp[l++] = tt[k];
274 loop_degree[loop[--lp]]++;
281 if (loop_size[0]) sprintf(tt, "E%d)" , loop_size[0]);
285 Shapiro = (char *) space(sizeof(char)*(strlen(temp)+2));
288 strcpy(Shapiro+1, temp);
289 } else strcpy(Shapiro, temp);
292 free(loop); free(bulge);
299 /*---------------------------------------------------------------------------*/
301 PUBLIC void parse_structure(const char *structure)
303 /*-----------------------------------------------------------------------------
305 upon return from parse_structure():
307 loops ....................... number of loops or stacks in structure.
308 loop_size[1 <= i <= loops] ..... size of i-th loop.
309 loop_size[0] ................... number of external digits.
310 loop_degree[1 <= i <= loops] ... degree (branches) of i-th loop.
311 loop_degree[0] ................. number of components.
312 helix_size[1 <= i <= loops] .... size of i-th stack.
313 unpaired ....................... n of unpaired digits.
314 pairs .......................... n of base pairs.
316 -----------------------------------------------------------------------------*/
324 temp = (char *) space(strlen(structure)*4+2);
325 bulge = (short *) space(sizeof(short)*(strlen(structure)/3+1));
326 loop = (short *) space(sizeof(short)*(strlen(structure)/3+1));
328 for (i = 0; i < STRUC; i++) {
329 loop_size[i] = helix_size[i] = 0;
331 loop[0] = loop_degree[0]=0; /* open structure has degree 0 */
332 pairs = unpaired = loops = lp = 0;
335 string = aux_struct(structure);
342 loop_size[loop[lp]]++;
345 if ((i>0)&&(string[i-1]=='(')) bulge[lp]=1;
347 loop_degree[++loops]=1;
352 if (string[i-1]==']') bulge[lp]=1;
356 if (string[i-1]==']') bulge[lp]=1;
357 helix_size[loop[lp]]=p+1;
360 loop_degree[loop[--lp]]++;
366 free(bulge); free(loop);
370 /*---------------------------------------------------------------------------*/
372 PUBLIC char *add_root(const char *structure)
375 xS = (char *) space(sizeof(char)*(strlen(structure)+4));
377 strcat(xS,structure);
383 /*---------------------------------------------------------------------------*/
385 PUBLIC char *expand_Shapiro(const char *structure)
390 temp = (char *) space(4*strlen(structure)+2);
395 while (i<strlen(structure)-1) {
396 temp[l++] = structure[i];
397 if (structure[i] == '(') temp[l++] = '(';
398 else if (structure[i] == ')') {
407 xS = (char *) space(sizeof(char)*(strlen(temp)+1));
413 /*---------------------------------------------------------------------------*/
415 PUBLIC char *expand_Full(const char *structure)
420 temp = (char *) space(4*strlen(structure)+2);
424 while (structure[i]) {
425 if (structure[i] == '(') temp[l++] = '(';
426 else if (structure[i] == ')') {
439 xF = (char *) space(sizeof(char)*(l+5));
447 /*---------------------------------------------------------------------------*/
449 PUBLIC char *unexpand_Full(const char *structure)
452 char id[10], *full, *temp;
453 int i, j, k, l, o, w;
455 temp = (char *) space(4*strlen(structure)+2);
456 match_paren = (short *) space(sizeof(short)*(strlen(structure)/2+1));
458 i = strlen(structure)-1;
462 switch (structure[i]) {
464 for (j=0; j<match_paren[o]; j++) temp[l++]='(';
465 match_paren[o--] = 0;
469 sscanf(id+k, "%d", &w);
470 for (j=0; j<w; j++) temp[l++]='.';
475 sscanf(id+k, "%d", &w);
476 for (j=0; j<w; j++) temp[l++]=')';
486 id[--k]=structure[i];
492 full = (char *) space(sizeof(char)*(l+1));
493 for (i=0; i<l; i++) full[i]=temp[l-i-1];
501 /*---------------------------------------------------------------------------*/
503 PUBLIC char *unweight(const char *structure)
508 temp = (char *) space(4*strlen(structure)+1);
511 while (structure[i]) {
512 if (!isdigit((int)structure[i])) temp[l++]=structure[i];
516 full = (char *) space(sizeof(char)*(l+1));
522 /*---------------------------------------------------------------------------*/
524 PUBLIC void unexpand_aligned_F(char *align[2])
529 t0 = (char *) space(strlen(align[0])+1);
530 t1 = (char *) space(strlen(align[0])+1);
532 for (i=0, l=0; i<strlen(align[0]); i++) {
533 switch (align[0][i]) {
540 switch (align[1][i]) {
549 t0[l-1]='_'; t0[l]='.';
550 t1[l-1]='('; t1[l]=')'; l++;
552 while (align[0][i]!=')') i++;
555 switch (align[1][i]) {
562 while (align[1][i]!=')') i++;
568 if (align[1][i]=='U') {
569 t1[l-1]='_'; t1[l]='.'; t0[l++]=')';
570 while (align[0][i]!=')') i++;
575 t0[l-1]=t1[l-1]='\0';
576 strcpy(align[0], t0+1);
577 strcpy(align[1], t1+1);