1 /* Last changed Time-stamp: <2006-04-05 12:58:49 ivo> */
3 c Ivo L Hofacker, Vienna RNA package
15 #include "part_func_co.h"
16 #include "part_func.h"
17 #include "fold_vars.h"
19 #include "read_epars.h"
21 #include "RNAcofold_cmdl.h"
24 PRIVATE char rcsid[] = "$Id: RNAcofold.c,v 1.7 2006/05/10 15:14:27 ivo Exp $";
26 PRIVATE char *costring(char *string);
27 PRIVATE char *tokenize(char *line);
28 PRIVATE cofoldF do_partfunc(char *string, int length, int Switch, struct plist **tpr, struct plist **mf, pf_paramT *parameters);
29 PRIVATE double *read_concentrations(FILE *fp);
30 PRIVATE void do_concentrations(double FEAB, double FEAA, double FEBB, double FEA, double FEB, double *startconces);
32 PRIVATE double bppmThreshold;
34 /*--------------------------------------------------------------------------*/
36 int main(int argc, char *argv[])
38 struct RNAcofold_args_info args_info;
39 unsigned int input_type;
40 char *string, *input_string;
41 char *structure, *cstruc, *rec_sequence, *orig_sequence, *rec_id, **rec_rest;
42 char fname[FILENAME_MAX_LENGTH], ffname[FILENAME_MAX_LENGTH];
46 int i, length, l, sym, r, cl;
48 double kT, sfact, betaScale;
51 int doT; /*compute dimere free energies etc.*/
52 int doC; /*toggle to compute concentrations*/
53 int doQ; /*toggle to compute prob of base being paired*/
54 int cofi; /*toggle concentrations stdin / file*/
56 plist *prAA; /*pair probabilities of AA dimer*/
61 plist *mfAA; /*pair mfobabilities of AA dimer*/
66 unsigned int rec_type, read_opt;
67 pf_paramT *pf_parameters;
72 #############################################
73 # init variables and parameter options
74 #############################################
96 rec_type = read_opt = 0;
97 rec_id = rec_sequence = orig_sequence = NULL;
100 set_model_details(&md);
102 #############################################
103 # check the command line prameters
104 #############################################
106 if(RNAcofold_cmdline_parser (argc, argv, &args_info) != 0) exit(1);
108 if(args_info.temp_given) temperature = args_info.temp_arg;
109 /* structure constraint */
110 if(args_info.constraint_given) fold_constrained=1;
111 /* do not take special tetra loop energies into account */
112 if(args_info.noTetra_given) md.special_hp = tetra_loop=0;
113 /* set dangle model */
114 if(args_info.dangles_given){
115 if((args_info.dangles_arg < 0) || (args_info.dangles_arg > 3))
116 warn_user("required dangle model not implemented, falling back to default dangles=2");
118 md.dangles = dangles = args_info.dangles_arg;
120 /* do not allow weak pairs */
121 if(args_info.noLP_given) md.noLP = noLonelyPairs = 1;
122 /* do not allow wobble pairs (GU) */
123 if(args_info.noGU_given) md.noGU = noGU = 1;
124 /* do not allow weak closing pairs (AU,GU) */
125 if(args_info.noClosingGU_given) md.noGUclosure = no_closingGU = 1;
126 /* gquadruplex support */
127 if(args_info.gquad_given) md.gquad = gquad = 1;
128 /* do not convert DNA nucleotide "T" to appropriate RNA "U" */
129 if(args_info.noconv_given) noconv = 1;
130 /* set energy model */
131 if(args_info.energyModel_given) energy_set = args_info.energyModel_arg;
133 if(args_info.noPS_given) noPS = 1;
134 /* take another energy parameter set */
135 if(args_info.paramFile_given) ParamFile = strdup(args_info.paramFile_arg);
136 /* Allow other pairs in addition to the usual AU,GC,and GU pairs */
137 if(args_info.nsp_given) ns_bases = strdup(args_info.nsp_arg);
138 /* set pf scaling factor */
139 if(args_info.pfScale_given) sfact = args_info.pfScale_arg;
141 if(args_info.all_pf_given) doT = pf = 1;
142 /* concentrations from stdin */
143 if(args_info.concentrations_given) doC = doT = pf = 1;
144 /* set the bppm threshold for the dotplot */
145 if(args_info.bppmThreshold_given)
146 bppmThreshold = MIN2(1., MAX2(0.,args_info.bppmThreshold_arg));
147 /* concentrations in file */
148 if(args_info.betaScale_given) betaScale = args_info.betaScale_arg;
149 if(args_info.concfile_given){
150 Concfile = strdup(args_info.concfile_arg);
151 doC = cofi = doT = pf = 1;
153 /* partition function settings */
154 if(args_info.partfunc_given){
156 if(args_info.partfunc_arg != -1)
157 do_backtrack = args_info.partfunc_arg;
159 /* free allocated memory of command line data structure */
160 RNAcofold_cmdline_parser_free (&args_info);
164 #############################################
166 #############################################
169 nrerror("G-Quadruplex support is currently not available for partition function computations");
172 if (ParamFile != NULL)
173 read_parameter_file(ParamFile);
175 if (ns_bases != NULL) {
176 nonstandards = space(33);
184 nonstandards[i++]=*c++;
185 nonstandards[i++]=*c;
186 if ((sym)&&(*c!=*(c-1))) {
187 nonstandards[i++]=*c;
188 nonstandards[i++]=*(c-1);
194 istty = isatty(fileno(stdout))&&isatty(fileno(stdin));
196 /* print user help if we get input from tty */
198 printf("Use '&' to connect 2 sequences that shall form a complex.\n");
199 if(fold_constrained){
200 print_tty_constraint(VRNA_CONSTRAINT_DOT | VRNA_CONSTRAINT_X | VRNA_CONSTRAINT_ANG_BRACK | VRNA_CONSTRAINT_RND_BRACK);
201 print_tty_input_seq_str("Input sequence (upper or lower case) followed by structure constraint\n");
203 else print_tty_input_seq();
206 /* set options we wanna pass to read_record */
207 if(istty) read_opt |= VRNA_INPUT_NOSKIP_BLANK_LINES;
208 if(!fold_constrained) read_opt |= VRNA_INPUT_NO_REST;
211 #############################################
212 # main loop: continue until end of file
213 #############################################
216 !((rec_type = read_record(&rec_id, &rec_sequence, &rec_rest, read_opt))
217 & (VRNA_INPUT_ERROR | VRNA_INPUT_QUIT))){
220 ########################################################
221 # init everything according to the data we've read
222 ########################################################
225 if(!istty) printf("%s\n", rec_id);
226 (void) sscanf(rec_id, ">%" XSTR(FILENAME_ID_LENGTH) "s", fname);
228 else fname[0] = '\0';
232 rec_sequence = tokenize(rec_sequence); /* frees input_string and sets cut_point */
233 length = (int) strlen(rec_sequence);
234 structure = (char *) space((unsigned) length+1);
236 /* parse the rest of the current dataset to obtain a structure constraint */
237 if(fold_constrained){
240 unsigned int coptions = (rec_id) ? VRNA_CONSTRAINT_MULTILINE : 0;
241 coptions |= VRNA_CONSTRAINT_DOT | VRNA_CONSTRAINT_X | VRNA_CONSTRAINT_ANG_BRACK | VRNA_CONSTRAINT_RND_BRACK;
242 getConstraint(&cstruc, (const char **)rec_rest, coptions);
243 cstruc = tokenize(cstruc);
244 if(cut_point != cp) nrerror("cut point in sequence and structure constraint differs");
245 cl = (cstruc) ? (int)strlen(cstruc) : 0;
247 if(cl == 0) warn_user("structure constraint is missing");
248 else if(cl < length) warn_user("structure constraint is shorter than sequence");
249 else if(cl > length) nrerror("structure constraint is too long");
251 if(cstruc) strncpy(structure, cstruc, sizeof(char)*(cl+1));
254 /* convert DNA alphabet to RNA if not explicitely switched off */
255 if(!noconv) str_DNA2RNA(rec_sequence);
256 /* store case-unmodified sequence */
257 orig_sequence = strdup(rec_sequence);
258 /* convert sequence to uppercase letters only */
259 str_uppercase(rec_sequence);
263 printf("length = %d\n", length);
265 printf("length1 = %d\nlength2 = %d\n", cut_point-1, length-cut_point+1);
269 ########################################################
270 # begin actual computations
271 ########################################################
276 if (cofi) { /* read from file */
277 fp = fopen(Concfile, "r");
279 fprintf(stderr, "could not open concentration file %s", Concfile);
282 ConcAandB = read_concentrations(fp);
285 printf("Please enter concentrations [mol/l]\n format: ConcA ConcB\n return to end\n");
286 ConcAandB = read_concentrations(stdin);
289 /*compute mfe of AB dimer*/
290 min_en = cofold(rec_sequence, structure);
291 assign_plist_from_db(&mfAB, structure, 0.95);
294 char *pstring, *pstruct;
295 if (cut_point == -1) {
296 pstring = strdup(orig_sequence);
297 pstruct = strdup(structure);
299 pstring = costring(orig_sequence);
300 pstruct = costring(structure);
302 printf("%s\n%s", pstring, pstruct);
304 printf("\n minimum free energy = %6.2f kcal/mol\n", min_en);
306 printf(" (%6.2f)\n", min_en);
308 (void) fflush(stdout);
311 char annot[512] = "";
312 if (fname[0]!='\0') {
313 strcpy(ffname, fname);
314 strcat(ffname, "_ss.ps");
316 strcpy(ffname, "rna.ps");
320 "1 %d 9 0 0.9 0.2 omark\n%d %d 9 1 0.1 0.2 omark\n",
321 cut_point-1, cut_point+1, length+1);
323 if (!noPS) (void) PS_rna_plot_a_gquad(pstring, pstruct, ffname, annot, NULL);
325 if (!noPS) (void) PS_rna_plot_a(pstring, pstruct, ffname, annot, NULL);
332 if (length>2000) free_co_arrays();
334 /*compute partition function*/
339 dangles=2; /* recompute with dangles as in pf_fold() */
340 min_en = energy_of_structure(rec_sequence, structure, 0);
344 kT = (betaScale*((temperature+K0)*GASCONST))/1000.; /* in Kcal */
345 pf_scale = exp(-(sfact*min_en)/kT/length);
346 if (length>2000) fprintf(stderr, "scaling factor %f\n", pf_scale);
348 pf_parameters = get_boltzmann_factors(temperature, betaScale, md, pf_scale);
351 strncpy(structure, cstruc, length+1);
352 AB = co_pf_fold_par(rec_sequence, structure, pf_parameters, do_backtrack, fold_constrained);
356 costruc = (char *) space(sizeof(char)*(strlen(structure)+2));
357 if (cut_point<0) printf("%s", structure);
359 strncpy(costruc, structure, cut_point-1);
360 strcat(costruc, "&");
361 strcat(costruc, structure+cut_point-1);
362 printf("%s", costruc);
364 if (!istty) printf(" [%6.2f]\n", AB.FAB);
365 else printf("\n");/*8.6.04*/
367 if ((istty)||(!do_backtrack))
368 printf(" free energy of ensemble = %6.2f kcal/mol\n", AB.FAB);
369 printf(" frequency of mfe structure in ensemble %g",
370 exp((AB.FAB-min_en)/kT));
372 printf(" , delta G binding=%6.2f\n", AB.FcAB - AB.FA - AB.FB);
374 probs = export_co_bppm();
375 assign_plist_from_pr(&prAB, probs, length, bppmThreshold);
377 /* if (doQ) make_probsum(length,fname); */ /*compute prob of base paired*/
378 /* free_co_arrays(); */
379 if (doT) { /* cofold of all dimers, monomers */
380 int Blength, Alength;
381 char *Astring, *Bstring, *orig_Astring, *orig_Bstring;
386 printf("Sorry, i cannot do that with only one molecule, please give me two or leave it\n");
391 if (dangles==1) dangles=2;
392 Alength=cut_point-1; /*length of first molecule*/
393 Blength=length-cut_point+1; /*length of 2nd molecule*/
395 Astring=(char *)space(sizeof(char)*(Alength+1));/*Sequence of first molecule*/
396 Bstring=(char *)space(sizeof(char)*(Blength+1));/*Sequence of second molecule*/
397 strncat(Astring,rec_sequence,Alength);
398 strncat(Bstring,rec_sequence+Alength,Blength);
400 orig_Astring=(char *)space(sizeof(char)*(Alength+1));/*Sequence of first molecule*/
401 orig_Bstring=(char *)space(sizeof(char)*(Blength+1));/*Sequence of second molecule*/
402 strncat(orig_Astring,orig_sequence,Alength);
403 strncat(orig_Bstring,orig_sequence+Alength,Blength);
405 /* compute AA dimer */
406 AA=do_partfunc(Astring, Alength, 2, &prAA, &mfAA, pf_parameters);
407 /* compute BB dimer */
408 BB=do_partfunc(Bstring, Blength, 2, &prBB, &mfBB, pf_parameters);
409 /*free_co_pf_arrays();*/
411 /* compute A monomer */
412 do_partfunc(Astring, Alength, 1, &prA, &mfA, pf_parameters);
414 /* compute B monomer */
415 do_partfunc(Bstring, Blength, 1, &prB, &mfB, pf_parameters);
417 compute_probabilities(AB.F0AB, AB.FA, AB.FB, prAB, prA, prB, Alength);
418 compute_probabilities(AA.F0AB, AA.FA, AA.FA, prAA, prA, prA, Alength);
419 compute_probabilities(BB.F0AB, BB.FA, BB.FA, prBB, prA, prB, Blength);
420 printf("Free Energies:\nAB\t\tAA\t\tBB\t\tA\t\tB\n%.6f\t%6f\t%6f\t%6f\t%6f\n",
421 AB.FcAB, AA.FcAB, BB.FcAB, AB.FA, AB.FB);
424 do_concentrations(AB.FcAB, AA.FcAB, BB.FcAB, AB.FA, AB.FB, ConcAandB);
425 free(ConcAandB);/*freeen*/
428 if (fname[0]!='\0') {
429 strcpy(ffname, fname);
430 strcat(ffname, "_dp5.ps");
431 } else strcpy(ffname, "dot5.ps");
432 /*output of the 5 dot plots*/
435 /*write Free Energy into comment*/
436 sprintf(comment,"\n%%Heterodimer AB FreeEnergy= %.9f\n", AB.FcAB);
440 strcpy(Newname,"AB");
441 strcat(Newname,ffname);
442 (void)PS_dot_plot_list(orig_sequence, Newname, prAB, mfAB, comment);
445 sprintf(comment,"\n%%Homodimer AA FreeEnergy= %.9f\n",AA.FcAB);
447 strcpy(Newname,"AA");
448 strcat(Newname,ffname);
449 /*write AA sequence*/
450 Newstring=(char*)space((2*Alength+1)*sizeof(char));
451 strcpy(Newstring,orig_Astring);
452 strcat(Newstring,orig_Astring);
453 (void)PS_dot_plot_list(Newstring, Newname, prAA, mfAA, comment);
457 sprintf(comment,"\n%%Homodimer BB FreeEnergy= %.9f\n",BB.FcAB);
459 strcpy(Newname,"BB");
460 strcat(Newname,ffname);
461 /*write BB sequence*/
462 Newstring=(char*)space((2*Blength+1)*sizeof(char));
463 strcpy(Newstring,orig_Bstring);
464 strcat(Newstring,orig_Bstring);
467 (void)PS_dot_plot_list(Newstring, Newname, prBB, mfBB, comment);
473 sprintf(comment,"\n%%Monomer A FreeEnergy= %.9f\n",AB.FA);
476 strcat(Newname,ffname);
477 /*write BB sequence*/
478 (void)PS_dot_plot_list(orig_Astring, Newname, prA, mfA, comment);
480 /*B monomer dot plot*/
481 sprintf(comment,"\n%%Monomer B FreeEnergy= %.9f\n",AB.FB);
484 strcat(Newname,ffname);
485 /*write BB sequence*/
486 (void)PS_dot_plot_list(orig_Bstring, Newname, prB, mfB, comment);
487 free(Astring); free(Bstring); free(orig_Astring); free(orig_Bstring);
488 free(prAB); free(prAA); free(prBB); free(prA); free(prB);
489 free(mfAB); free(mfAA); free(mfBB); free(mfA); free(mfB);
498 if (fname[0]!='\0') {
499 strcpy(ffname, fname);
500 strcat(ffname, "_dp.ps");
501 } else strcpy(ffname, "dot.ps");
504 if (pf) { (void) PS_dot_plot_list(rec_sequence, ffname, prAB, mfAB, "doof");
509 if (!doT) free_co_pf_arrays();
511 (void) fflush(stdout);
514 if(cstruc) free(cstruc);
515 if(rec_id) free(rec_id);
519 /* free the rest of current dataset */
521 for(i=0;rec_rest[i];i++) free(rec_rest[i]);
524 rec_id = rec_sequence = orig_sequence = structure = cstruc = NULL;
527 /* print user help for the next round if we get input from tty */
529 printf("Use '&' to connect 2 sequences that shall form a complex.\n");
530 if(fold_constrained){
531 print_tty_constraint(VRNA_CONSTRAINT_DOT | VRNA_CONSTRAINT_X | VRNA_CONSTRAINT_ANG_BRACK | VRNA_CONSTRAINT_RND_BRACK);
532 print_tty_input_seq_str("Input sequence (upper or lower case) followed by structure constraint\n");
534 else print_tty_input_seq();
540 PRIVATE char *tokenize(char *line)
545 copy = (char *) space(strlen(line)+1);
546 (void) sscanf(line, "%s", copy);
547 pos = strchr(copy, '&');
549 cut = (int) (pos-copy)+1;
550 if (cut >= strlen(copy)) cut = -1;
551 if (strchr(pos+1, '&')) nrerror("more than one cut-point in input");
552 for (;*pos;pos++) *pos = *(pos+1); /* splice out the & */
555 if (cut_point==-1) cut_point = cut;
556 else if (cut_point != cut) {
557 fprintf(stderr,"cut_point = %d cut = %d\n", cut_point, cut);
558 nrerror("Sequence and Structure have different cut points.");
565 PRIVATE char *costring(char *string)
570 len = strlen(string);
571 ctmp = (char *)space((len+2) * sizeof(char));
573 (void) strncpy(ctmp, string, cut_point-1);
575 ctmp[cut_point-1] = '&';
576 /* second sequence */
577 (void) strcat(ctmp, string+cut_point-1);
582 PRIVATE cofoldF do_partfunc(char *string, int length, int Switch, struct plist **tpr, struct plist **mfpl, pf_paramT *parameters){
583 /*compute mfe and partition function of dimere or monomer*/
592 kT = parameters->kT/1000.;
597 tempstruc = (char *) space((unsigned)length+1);
598 min_en = fold(string, tempstruc);
599 assign_plist_from_db(mfpl, tempstruc, 0.95);
601 /*En=pf_fold(string, tempstruc);*/
602 /* init_co_pf_fold(length); <- obsolete */
603 par = get_boltzmann_factor_copy(parameters);
604 par->pf_scale = exp(-(sfact*min_en)/kT/(length));
605 X=co_pf_fold_par(string, tempstruc, par, 1, fold_constrained);
606 probs = export_co_bppm();
607 assign_plist_from_pr(tpr, probs, length, bppmThreshold);
614 Newstring=(char *)space(sizeof(char)*(length*2+1));
615 strcat(Newstring,string);
616 strcat(Newstring,string);
618 tempstruc = (char *) space((unsigned)length*2+1);
619 min_en = cofold(Newstring, tempstruc);
620 assign_plist_from_db(mfpl, tempstruc, 0.95);
622 /* init_co_pf_fold(2*length); <- obsolete */
623 par = get_boltzmann_factor_copy(parameters);
624 par->pf_scale =exp(-(sfact*min_en)/kT/(2*length));
625 X=co_pf_fold_par(Newstring, tempstruc, par, 1, fold_constrained);
626 probs = export_co_bppm();
627 assign_plist_from_pr(tpr, probs, 2*length, bppmThreshold);
635 printf("Error in get_partfunc\n, computing neither mono- nor dimere!\n");
643 PRIVATE void do_concentrations(double FEAB, double FEAA, double FEBB, double FEA, double FEB, double *startconc) {
644 /* compute and print concentrations out of free energies, calls get_concentrations */
645 struct ConcEnt *result;
648 result=get_concentrations(FEAB, FEAA, FEBB, FEA, FEB, startconc);
650 printf("Initial concentrations\t\trelative Equilibrium concentrations\n");
651 printf("A\t\t B\t\t AB\t\t AA\t\t BB\t\t A\t\t B\n");
652 for (n=0; (startconc[2*n]>0) || (startconc[2*n+1]>0); n++); /* count */
653 for (i=0; i<n ;i++) {
654 double tot = result[i].A0+result[i].B0;
655 printf("%-10g\t%-10g\t%.5f \t%.5f \t%.5f \t%.5f \t%.5f\n", result[i].A0, result[i].B0,
656 result[i].ABc/tot, result[i].AAc/tot, result[i].BBc/tot ,result[i].Ac/tot, result[i].Bc/tot);
662 PRIVATE double *read_concentrations(FILE *fp) {
663 /* reads concentrations, returns list of double, -1. marks end */
668 startc = (double *) space((2*n+1)*sizeof(double));
670 while ((line=get_line(fp))!=NULL) {
674 startc=(double *)xrealloc(startc,(2*n+1)*sizeof(double));
676 c = sscanf(line,"%lf %lf", &startc[i], &startc[i+1]);
681 startc[i]=startc[i+1]=0;