1 /* Last changed Time-stamp: <2008-07-02 17:10:04 berni> */
3 Ineractive Access to folding Routines
16 #include "part_func.h"
17 #include "fold_vars.h"
20 #include "energy_const.h"
21 #include "read_epars.h"
24 #include "RNAplfold_cmdl.h"
27 static char rcsid[] = "$Id: RNAplfold.c,v 1.10 2008/07/02 15:34:24 ivo Exp $";
30 PRIVATE void putout_pup(double *pup,int length, int winsize, char *name);
31 PRIVATE void putoutpU_G(double **pU,int length, int ulength, FILE *fp);
32 PRIVATE void putoutphakim_u(double **pU,int length, int ulength, FILE *fp);
34 /*--------------------------------------------------------------------------*/
35 int main(int argc, char *argv[]){
36 struct RNAplfold_args_info args_info;
37 unsigned int error = 0;
38 char fname[FILENAME_MAX_LENGTH], ffname[FILENAME_MAX_LENGTH], *c, *structure, *ParamFile, *ns_bases, *rec_sequence, *rec_id, **rec_rest, *orig_sequence;
39 unsigned int input_type;
40 int i, length, l, sym, r, istty, winsize, pairdist;
42 int tempwin, temppair, tempunpaired;
43 FILE *pUfp = NULL, *spup = NULL;
44 double **pup = NULL; /*prob of being unpaired, lengthwise*/
45 int noconv, plexoutput, simply_putout, openenergies, binaries;
46 plist *pl, *dpp = NULL;
47 unsigned int rec_type, read_opt;
49 pf_paramT *pf_parameters;
58 simply_putout = plexoutput = openenergies = noconv = 0;binaries=0;
59 tempwin = temppair = tempunpaired = 0;
60 structure = ParamFile = ns_bases = NULL;
61 rec_type = read_opt = 0;
62 rec_id = rec_sequence = orig_sequence = NULL;
66 set_model_details(&md);
69 #############################################
70 # check the command line parameters
71 #############################################
73 if(RNAplfold_cmdline_parser (argc, argv, &args_info) != 0) exit(1);
75 if(args_info.temp_given) temperature = args_info.temp_arg;
76 /* do not take special tetra loop energies into account */
77 if(args_info.noTetra_given) md.special_hp = tetra_loop=0;
78 /* set dangle model */
79 if(args_info.dangles_given){
80 if((args_info.dangles_arg != 0) && (args_info.dangles_arg != 2))
81 warn_user("required dangle model not implemented, falling back to default dangles=2");
83 md.dangles = dangles = args_info.dangles_arg;
85 /* do not allow weak pairs */
86 if(args_info.noLP_given) md.noLP = noLonelyPairs = 1;
87 /* do not allow wobble pairs (GU) */
88 if(args_info.noGU_given) md.noGU = noGU = 1;
89 /* do not allow weak closing pairs (AU,GU) */
90 if(args_info.noClosingGU_given) md.noGUclosure = no_closingGU = 1;
91 /* do not convert DNA nucleotide "T" to appropriate RNA "U" */
92 if(args_info.noconv_given) noconv = 1;
93 /* set energy model */
94 if(args_info.energyModel_given) energy_set = args_info.energyModel_arg;
95 /* take another energy parameter set */
96 if(args_info.paramFile_given) ParamFile = strdup(args_info.paramFile_arg);
97 /* Allow other pairs in addition to the usual AU,GC,and GU pairs */
98 if(args_info.nsp_given) ns_bases = strdup(args_info.nsp_arg);
99 /* set the maximum base pair span */
100 if(args_info.span_given) pairdist = args_info.span_arg;
101 /* set the pair probability cutoff */
102 if(args_info.cutoff_given) cutoff = args_info.cutoff_arg;
103 /* set the windowsize */
104 if(args_info.winsize_given) winsize = args_info.winsize_arg;
105 /* set the length of unstructured region */
106 if(args_info.ulength_given) unpaired = args_info.ulength_arg;
107 /* compute opening energies */
108 if(args_info.opening_energies_given) openenergies = 1;
109 /* print output on the fly */
110 if(args_info.print_onthefly_given) simply_putout = 1;
111 /* turn on RNAplex output */
112 if(args_info.plex_output_given) plexoutput = 1;
113 /* turn on binary output*/
114 if(args_info.binaries_given) binaries = 1;
116 if(args_info.betaScale_given) betaScale = args_info.betaScale_arg;
118 /* check for errorneous parameter options */
119 if((pairdist < 0) || (cutoff < 0.) || (unpaired < 0) || (winsize < 0)){
120 RNAplfold_cmdline_parser_print_help();
124 /* free allocated memory of command line data structure */
125 RNAplfold_cmdline_parser_free(&args_info);
128 #############################################
130 #############################################
132 if (ParamFile != NULL)
133 read_parameter_file(ParamFile);
135 if (ns_bases != NULL) {
136 nonstandards = space(33);
144 nonstandards[i++]=*c++;
145 nonstandards[i++]=*c;
146 if ((sym)&&(*c!=*(c-1))) {
147 nonstandards[i++]=*c;
148 nonstandards[i++]=*(c-1);
155 /* check parameter options again and reset to reasonable values if needed */
156 if(openenergies && !unpaired) unpaired = 31;
157 if(pairdist == 0) pairdist = winsize;
158 if(pairdist > winsize){
159 fprintf(stderr, "pairdist (-L %d) should be <= winsize (-W %d);"
160 "Setting pairdist=winsize\n",pairdist, winsize);
164 warn_user("using default dangles = 2");
168 istty = isatty(fileno(stdout))&&isatty(fileno(stdin));
169 read_opt |= VRNA_INPUT_NO_REST;
171 print_tty_input_seq();
172 read_opt |= VRNA_INPUT_NOSKIP_BLANK_LINES;
176 #############################################
177 # main loop: continue until end of file
178 #############################################
181 !((rec_type = read_record(&rec_id, &rec_sequence, &rec_rest, read_opt))
182 & (VRNA_INPUT_ERROR | VRNA_INPUT_QUIT))){
185 ########################################################
186 # init everything according to the data we've read
187 ########################################################
190 if(!istty) printf("%s\n", rec_id);
191 (void) sscanf(rec_id, ">%" XSTR(FILENAME_ID_LENGTH) "s", fname);
193 else fname[0] = '\0';
195 length = (int)strlen(rec_sequence);
196 structure = (char *) space((unsigned) length+1);
198 /* convert DNA alphabet to RNA if not explicitely switched off */
199 if(!noconv) str_DNA2RNA(rec_sequence);
200 /* store case-unmodified sequence */
201 orig_sequence = strdup(rec_sequence);
202 /* convert sequence to uppercase letters only */
203 str_uppercase(rec_sequence);
205 if(istty) printf("length = %d\n", length);
208 ########################################################
209 # done with 'stdin' handling
210 ########################################################
213 if(length > 1000000){
214 if(!simply_putout && !unpaired){
215 printf("Switched to simple output mode!!!\n");
219 if(unpaired && simply_putout){
220 printf("Output simplification not possible if unpaired is switched on\n");
224 /* restore winsize if altered before */
229 /* restore pairdist if altered before */
234 /* restore ulength if altered before */
235 if(tempunpaired != 0){
236 unpaired = tempunpaired;
240 /* adjust winsize, pairdist and ulength if necessary */
241 if(length < winsize){
242 fprintf(stderr, "WARN: window size %d larger than sequence length %d\n", winsize, length);
245 if (pairdist>winsize) {
249 if (unpaired>winsize) {
250 tempunpaired=unpaired;
256 ########################################################
257 # begin actual computations
258 ########################################################
262 /* construct output file names */
263 char fname1[FILENAME_MAX_LENGTH], fname2[FILENAME_MAX_LENGTH], fname3[FILENAME_MAX_LENGTH], fname4[FILENAME_MAX_LENGTH], fname_t[FILENAME_MAX_LENGTH];
265 strcpy(fname_t, (fname[0] != '\0') ? fname : "plfold");
267 strcpy(fname1, fname_t);
268 strcpy(fname2, fname_t);
269 strcpy(fname3, fname_t);
270 strcpy(fname4, fname_t);
271 strcpy(ffname, fname_t);
273 strcat(fname1, "_lunp");
274 strcat(fname2, "_basepairs");
275 strcat(fname3, "_uplex");
277 strcat(fname4, "_openen_bin");
280 strcat(fname4, "_openen");
282 strcat(ffname, "_dp.ps");
284 pf_parameters = get_boltzmann_factors(temperature, betaScale, md, -1);
287 pup =(double **) space((length+1)*sizeof(double *));
288 pup[0] =(double *) space(sizeof(double)); /*I only need entry 0*/
289 pup[0][0] = unpaired;
294 spup = fopen(fname2, "w");
295 pUfp = (unpaired > 0) ? fopen(fname1, "w") : NULL;
297 pl = pfl_fold_par(rec_sequence, winsize, pairdist, cutoff, pup, &dpp, pUfp, spup, pf_parameters);
299 if(pUfp != NULL) fclose(pUfp);
300 if(spup != NULL) fclose(spup);
303 pl = pfl_fold_par(rec_sequence, winsize, pairdist, cutoff, pup, &dpp, pUfp, spup, pf_parameters);
304 PS_dot_plot_turn(orig_sequence, pl, ffname, pairdist);
307 pUfp = fopen(fname3, "w");
308 putoutphakim_u(pup,length, unpaired, pUfp);
311 pUfp = fopen(openenergies ? fname4 : fname1, "w");
313 putoutpU_prob_bin_par(pup, length, unpaired, pUfp, openenergies, pf_parameters);
316 putoutpU_prob_par(pup, length, unpaired, pUfp, openenergies, pf_parameters);
329 (void) fflush(stdout);
332 if(rec_id) free(rec_id);
336 rec_id = rec_sequence = orig_sequence = NULL;
338 /* print user help for the next round if we get input from tty */
340 if(istty) print_tty_input_seq();
345 /* additional output functions */
347 PRIVATE void putout_pup(double *pup,int length, int winsize, char *name) {
354 fprintf(FP,"&prob of being unpaired between i-%d and i\n",unpaired);
356 for (i=unpaired; i<=length; i++) {
359 factor=1./(i-unpaired+1);
361 if (i>length-winsize+unpaired-1) {
362 tfact=1./(length-i+1);
369 tfact=1./(winsize-unpaired+1);
374 fprintf(FP,"%d %.6f\n",i,pup[i]*factor);
379 PRIVATE void putoutpU_G(double **pU,int length, int ulength, FILE *fp) {
380 /*put out unpaireds */
382 fprintf(fp,"#unpaired probabilities\n #i\tl=");
383 for (i=1; i<=ulength; i++) {
384 fprintf(fp,"%d\t", i);
387 for (k=1; k<=length; k++){
388 fprintf(fp,"%d\t",k);
389 for (i=1; i<=ulength; i++) {
390 if (k+(i-1)<=length) fprintf(fp,"%.7g\t",pU[k+(i-1)][i]);
399 PRIVATE void putoutphakim_u(double **pU,int length, int ulength, FILE *fp) {
400 /*put out Fopen in dekacalories per mol, and F(cond,open) also in dekacal*/
403 float RT = (temperature+K0)*GASCONST;
409 fprintf(fp,"#energy necessary to unpair as well as to unpair if i-1 is unpaired also, if i+1 is unpaired also in dekacal/mol\n");
410 for (k=1; k<=length; k++){
411 fprintf(fp,"%d\t",k);
413 f0=(int) -RT*log(p0)/10;
414 fprintf(fp,"%d\t", f0);
416 pdep=pU[k][2]/pU[k-1][1];
417 fdep=(int) -RT*log(pdep)/10;
418 fprintf(fp,"%d\t",fdep);
422 else fprintf(fp,"-0\t");
424 pdep=pU[k+1][2]/pU[k+1][1];
425 fdep=(int) -RT*log(pdep)/10;
426 fprintf(fp,"%d\t",fdep);
428 else fprintf(fp,"-0\t");