1 /* Last changed Time-stamp: <2012-02-15 18:20:49 ivo> */
3 Ineractive Access to folding Routines
10 *** \brief RNAfold program source code
12 *** This code provides an interface for MFE and Partition function folding
13 *** of single linear or circular RNA molecules.
23 #include "part_func.h"
24 #include "fold_vars.h"
27 #include "read_epars.h"
30 #include "RNAfold_cmdl.h"
35 static char UNUSED rcsid[] = "$Id: RNAfold.c,v 1.25 2009/02/24 14:22:21 ivo Exp $";
37 /*--------------------------------------------------------------------------*/
39 int main(int argc, char *argv[]){
40 struct RNAfold_args_info args_info;
41 char *buf, *rec_sequence, *rec_id, **rec_rest, *structure, *cstruc, *orig_sequence;
42 char fname[FILENAME_MAX_LENGTH], ffname[FILENAME_MAX_LENGTH], *ParamFile;
44 int i, length, l, cl, sym, r, istty, pf, noPS, noconv, fasta;
45 unsigned int rec_type, read_opt;
46 double energy, min_en, kT, sfact;
47 int doMEA, circular, lucky;
48 double MEAgamma, bppmThreshold, betaScale;
49 paramT *mfe_parameters;
50 pf_paramT *pf_parameters;
53 rec_type = read_opt = 0;
54 rec_id = buf = rec_sequence = structure = cstruc = orig_sequence = NULL;
75 set_model_details(&md);
79 #############################################
80 # check the command line parameters
81 #############################################
83 if(RNAfold_cmdline_parser (argc, argv, &args_info) != 0) exit(1);
85 if(args_info.temp_given) temperature = args_info.temp_arg;
86 /* structure constraint */
87 if(args_info.constraint_given) fold_constrained=1;
88 /* do not take special tetra loop energies into account */
89 if(args_info.noTetra_given) md.special_hp = tetra_loop=0;
90 /* set dangle model */
91 if(args_info.dangles_given){
92 if((args_info.dangles_arg < 0) || (args_info.dangles_arg > 3))
93 warn_user("required dangle model not implemented, falling back to default dangles=2");
95 md.dangles = dangles = args_info.dangles_arg;
97 /* do not allow weak pairs */
98 if(args_info.noLP_given) md.noLP = noLonelyPairs = 1;
99 /* do not allow wobble pairs (GU) */
100 if(args_info.noGU_given) md.noGU = noGU = 1;
101 /* do not allow weak closing pairs (AU,GU) */
102 if(args_info.noClosingGU_given) md.noGUclosure = no_closingGU = 1;
103 /* gquadruplex support */
104 if(args_info.gquad_given) md.gquad = gquad = 1;
105 /* do not convert DNA nucleotide "T" to appropriate RNA "U" */
106 if(args_info.noconv_given) noconv = 1;
107 /* set energy model */
108 if(args_info.energyModel_given) energy_set = args_info.energyModel_arg;
109 /* take another energy parameter set */
110 if(args_info.paramFile_given) ParamFile = strdup(args_info.paramFile_arg);
111 /* Allow other pairs in addition to the usual AU,GC,and GU pairs */
112 if(args_info.nsp_given) ns_bases = strdup(args_info.nsp_arg);
113 /* set pf scaling factor */
114 if(args_info.pfScale_given) sfact = args_info.pfScale_arg;
115 /* assume RNA sequence to be circular */
116 if(args_info.circ_given) circular=1;
117 /* always look on the bright side of life */
118 if(args_info.ImFeelingLucky_given) lucky = pf = st_back = 1;
119 /* set the bppm threshold for the dotplot */
120 if(args_info.bppmThreshold_given)
121 bppmThreshold = MIN2(1., MAX2(0.,args_info.bppmThreshold_arg));
122 if(args_info.betaScale_given) betaScale = args_info.betaScale_arg;
123 /* do not produce postscript output */
124 if(args_info.noPS_given) noPS=1;
125 /* partition function settings */
126 if(args_info.partfunc_given){
128 if(args_info.partfunc_arg != 1)
129 do_backtrack = args_info.partfunc_arg;
131 /* MEA (maximum expected accuracy) settings */
132 if(args_info.MEA_given){
134 if(args_info.MEA_arg != -1)
135 MEAgamma = args_info.MEA_arg;
138 /* free allocated memory of command line data structure */
139 RNAfold_cmdline_parser_free (&args_info);
141 mfe_parameters = get_scaled_parameters(temperature, md);
144 #############################################
146 #############################################
148 if(circular && gquad){
149 nrerror("G-Quadruplex support is currently not available for circular RNA structures");
152 if (ParamFile != NULL)
153 read_parameter_file(ParamFile);
155 if (circular && noLonelyPairs)
156 warn_user("depending on the origin of the circular sequence, some structures may be missed when using -noLP\nTry rotating your sequence a few times");
158 if (ns_bases != NULL) {
159 nonstandards = space(33);
167 nonstandards[i++]=*c++;
168 nonstandards[i++]=*c;
169 if ((sym)&&(*c!=*(c-1))) {
170 nonstandards[i++]=*c;
171 nonstandards[i++]=*(c-1);
178 istty = isatty(fileno(stdout))&&isatty(fileno(stdin));
180 /* print user help if we get input from tty */
182 if(fold_constrained){
183 print_tty_constraint_full();
184 print_tty_input_seq_str("Input sequence (upper or lower case) followed by structure constraint");
186 else print_tty_input_seq();
189 /* set options we wanna pass to read_record */
190 if(istty) read_opt |= VRNA_INPUT_NOSKIP_BLANK_LINES;
191 if(!fold_constrained) read_opt |= VRNA_INPUT_NO_REST;
194 #############################################
195 # main loop: continue until end of file
196 #############################################
199 !((rec_type = read_record(&rec_id, &rec_sequence, &rec_rest, read_opt))
200 & (VRNA_INPUT_ERROR | VRNA_INPUT_QUIT))){
203 ########################################################
204 # init everything according to the data we've read
205 ########################################################
208 if(!istty) printf("%s\n", rec_id);
209 (void) sscanf(rec_id, ">%" XSTR(FILENAME_ID_LENGTH) "s", fname);
211 else fname[0] = '\0';
213 length = (int)strlen(rec_sequence);
214 structure = (char *)space(sizeof(char) *(length+1));
216 /* parse the rest of the current dataset to obtain a structure constraint */
217 if(fold_constrained){
219 unsigned int coptions = (rec_id) ? VRNA_CONSTRAINT_MULTILINE : 0;
220 coptions |= VRNA_CONSTRAINT_ALL;
221 getConstraint(&cstruc, (const char **)rec_rest, coptions);
222 cl = (cstruc) ? (int)strlen(cstruc) : 0;
224 if(cl == 0) warn_user("structure constraint is missing");
225 else if(cl < length) warn_user("structure constraint is shorter than sequence");
226 else if(cl > length) nrerror("structure constraint is too long");
227 if(cstruc) strncpy(structure, cstruc, sizeof(char)*(cl+1));
230 /* convert DNA alphabet to RNA if not explicitely switched off */
231 if(!noconv) str_DNA2RNA(rec_sequence);
232 /* store case-unmodified sequence */
233 orig_sequence = strdup(rec_sequence);
234 /* convert sequence to uppercase letters only */
235 str_uppercase(rec_sequence);
237 if(istty) printf("length = %d\n", length);
240 ########################################################
241 # begin actual computations
242 ########################################################
244 min_en = fold_par(rec_sequence, structure, mfe_parameters, fold_constrained, circular);
247 printf("%s\n%s", orig_sequence, structure);
249 printf("\n minimum free energy = %6.2f kcal/mol\n", min_en);
251 printf(" (%6.2f)\n", min_en);
252 (void) fflush(stdout);
254 if(fname[0] != '\0'){
255 strcpy(ffname, fname);
256 strcat(ffname, "_ss.ps");
257 } else strcpy(ffname, "rna.ps");
260 if (!noPS) (void) PS_rna_plot_a_gquad(orig_sequence, structure, ffname, NULL, NULL);
262 if (!noPS) (void) PS_rna_plot_a(orig_sequence, structure, ffname, NULL, NULL);
265 if (length>2000) free_arrays();
267 char *pf_struc = (char *) space((unsigned) length+1);
269 md.dangles=2; /* recompute with dangles as in pf_fold() */
270 min_en = (circular) ? energy_of_circ_struct_par(rec_sequence, structure, mfe_parameters, 0) : energy_of_struct_par(rec_sequence, structure, mfe_parameters, 0);
276 kT = (betaScale*((temperature+K0)*GASCONST))/1000.; /* in Kcal */
277 pf_scale = exp(-(sfact*min_en)/kT/length);
278 if (length>2000) fprintf(stderr, "scaling factor %f\n", pf_scale);
280 if (cstruc!=NULL) strncpy(pf_struc, cstruc, length+1);
282 pf_parameters = get_boltzmann_factors(temperature, betaScale, md, pf_scale);
283 energy = pf_fold_par(rec_sequence, pf_struc, pf_parameters, do_backtrack, fold_constrained, circular);
287 char *s = (circular) ? pbacktrack_circ(rec_sequence) : pbacktrack(rec_sequence);
288 min_en = (circular) ? energy_of_circ_struct_par(rec_sequence, s, mfe_parameters, 0) : energy_of_struct_par(rec_sequence, s, mfe_parameters, 0);
289 printf("%s\n%s", orig_sequence, s);
291 printf("\n free energy = %6.2f kcal/mol\n", min_en);
293 printf(" (%6.2f)\n", min_en);
294 (void) fflush(stdout);
295 if(fname[0] != '\0'){
296 strcpy(ffname, fname);
297 strcat(ffname, "_ss.ps");
298 } else strcpy(ffname, "rna.ps");
300 if (!noPS) (void) PS_rna_plot(orig_sequence, s, ffname);
306 printf("%s", pf_struc);
307 if (!istty) printf(" [%6.2f]\n", energy);
310 if ((istty)||(!do_backtrack))
311 printf(" free energy of ensemble = %6.2f kcal/mol\n", energy);
317 double dist, cent_en;
318 FLT_OR_DBL *probs = export_bppm();
321 assign_plist_gquad_from_pr(&pl1, length, bppmThreshold);
323 assign_plist_from_pr(&pl1, probs, length, bppmThreshold);
325 assign_plist_from_db(&pl2, structure, 0.95*0.95);
326 /* cent = centroid(length, &dist); <- NOT THREADSAFE */
329 cent = get_centroid_struct_gquad_pr(length, &dist);
330 cent_en = energy_of_gquad_structure((const char *)rec_sequence, (const char *)cent, 0);
332 cent = get_centroid_struct_pr(length, &dist, probs);
333 cent_en = (circular) ? energy_of_circ_struct_par(rec_sequence, cent, mfe_parameters, 0) : energy_of_struct_par(rec_sequence, cent, mfe_parameters, 0);
336 printf("%s {%6.2f d=%.2f}\n", cent, cent_en, dist);
338 if (fname[0]!='\0') {
339 strcpy(ffname, fname);
340 strcat(ffname, "_dp.ps");
341 } else strcpy(ffname, "dot.ps");
342 (void) PS_dot_plot_list(orig_sequence, ffname, pl1, pl2, "");
344 if (do_backtrack==2) {
345 pl2 = stackProb(1e-5);
346 if (fname[0]!='\0') {
347 strcpy(ffname, fname);
348 strcat(ffname, "_dp2.ps");
349 } else strcpy(ffname, "dot2.ps");
350 PS_dot_plot_list(orig_sequence, ffname, pl1, pl2,
351 "Probabilities for stacked pairs (i,j)(i+1,j-1)");
359 assign_plist_from_pr(&pl, probs, length, 1e-4/(1+MEAgamma));
362 mea = MEA_seq(pl, rec_sequence, structure, MEAgamma, pf_parameters);
363 mea_en = energy_of_gquad_structure((const char *)rec_sequence, (const char *)structure, 0);
364 printf("%s {%6.2f MEA=%.2f}\n", structure, mea_en, mea);
366 mea = MEA(pl, structure, MEAgamma);
367 mea_en = (circular) ? energy_of_circ_struct_par(rec_sequence, structure, mfe_parameters, 0) : energy_of_struct_par(rec_sequence, structure, mfe_parameters, 0);
368 printf("%s {%6.2f MEA=%.2f}\n", structure, mea_en, mea);
374 printf(" frequency of mfe structure in ensemble %g; ", exp((energy-min_en)/kT));
376 printf("ensemble diversity %-6.2f", mean_bp_distance(length));
382 (void) fflush(stdout);
385 if(cstruc) free(cstruc);
386 if(rec_id) free(rec_id);
390 /* free the rest of current dataset */
392 for(i=0;rec_rest[i];i++) free(rec_rest[i]);
395 rec_id = rec_sequence = structure = cstruc = NULL;
398 /* print user help for the next round if we get input from tty */
400 if(fold_constrained){
401 print_tty_constraint_full();
402 print_tty_input_seq_str("Input sequence (upper or lower case) followed by structure constraint");
404 else print_tty_input_seq();
408 free(mfe_parameters);