1 /* Last changed Time-stamp: <2008-12-03 16:38:01 ivo> */
3 Ineractive access to suboptimal folding
15 #include "part_func.h"
18 #include "fold_vars.h"
20 #include "read_epars.h"
23 #include "RNAsubopt_cmdl.h"
26 static char UNUSED rcsid[] = "$Id: RNAsubopt.c,v 1.20 2008/12/03 16:55:44 ivo Exp $";
28 PRIVATE char *tokenize(char *line);
29 PRIVATE void putoutzuker(SOLUTION* zukersolution);
31 int main(int argc, char *argv[]){
32 struct RNAsubopt_args_info args_info;
33 unsigned int input_type;
34 unsigned int rec_type, read_opt;
35 char fname[FILENAME_MAX_LENGTH], *c, *input_string, *rec_sequence, *rec_id, **rec_rest, *orig_sequence;
36 char *cstruc, *structure, *ParamFile, *ns_bases;
37 int i, length, l, cl, sym, istty;
38 double deltaf, deltap, betaScale;
39 int delta, n_back, noconv, circular, dos, zuker;
41 pf_paramT *pf_parameters;
48 deltap = n_back = noconv = circular = dos = zuker = 0;
49 rec_type = read_opt = 0;
50 rec_id = rec_sequence = orig_sequence = NULL;
52 input_string = c = cstruc = structure = ParamFile = ns_bases = NULL;
56 set_model_details(&md);
59 #############################################
60 # check the command line parameters
61 #############################################
63 if(RNAsubopt_cmdline_parser (argc, argv, &args_info) != 0) exit(1);
65 if(args_info.temp_given) temperature = args_info.temp_arg;
66 /* structure constraint */
67 if(args_info.constraint_given) fold_constrained=1;
68 /* do not take special tetra loop energies into account */
69 if(args_info.noTetra_given) md.special_hp = tetra_loop=0;
70 /* set dangle model */
71 if(args_info.dangles_given){
72 if((args_info.dangles_arg < 0) || (args_info.dangles_arg > 3))
73 warn_user("required dangle model not implemented, falling back to default dangles=2");
75 md.dangles = dangles = args_info.dangles_arg;
77 /* do not allow weak pairs */
78 if(args_info.noLP_given) md.noLP = noLonelyPairs = 1;
79 /* do not allow wobble pairs (GU) */
80 if(args_info.noGU_given) md.noGU = noGU = 1;
81 /* do not allow weak closing pairs (AU,GU) */
82 if(args_info.noClosingGU_given) md.noGUclosure = no_closingGU = 1;
83 /* do not convert DNA nucleotide "T" to appropriate RNA "U" */
84 if(args_info.noconv_given) noconv = 1;
85 /* take another energy parameter set */
86 if(args_info.paramFile_given) ParamFile = strdup(args_info.paramFile_arg);
87 /* Allow other pairs in addition to the usual AU,GC,and GU pairs */
88 if(args_info.nsp_given) ns_bases = strdup(args_info.nsp_arg);
90 if(args_info.deltaEnergy_given) delta = (int) (0.1+args_info.deltaEnergy_arg*100);
91 /* energy range after post evaluation */
92 if(args_info.deltaEnergyPost_given) deltap = args_info.deltaEnergyPost_arg;
94 if(args_info.sorted_given) subopt_sorted = 1;
95 /* assume RNA sequence to be circular */
96 if(args_info.circ_given) circular=1;
97 /* stochastic backtracking */
98 if(args_info.stochBT_given){
99 n_back = args_info.stochBT_arg;
102 if(args_info.betaScale_given) betaScale = args_info.betaScale_arg;
103 /* density of states */
104 if(args_info.dos_given){
106 print_energy = -999999;
108 /* logarithmic multiloop energies */
109 if(args_info.logML_given) md.logML = logML = 1;
111 if(args_info.zuker_given) zuker = 1;
115 warn_user("Sorry, zuker subopts not yet implemented for circfold");
116 RNAsubopt_cmdline_parser_print_help();
120 warn_user("Can't do zuker subopts and stochastic subopts at the same time");
121 RNAsubopt_cmdline_parser_print_help();
126 /* free allocated memory of command line data structure */
127 RNAsubopt_cmdline_parser_free(&args_info);
130 #############################################
132 #############################################
135 if (ParamFile != NULL) read_parameter_file(ParamFile);
137 if (ns_bases != NULL) {
138 nonstandards = space(33);
146 nonstandards[i++]=*c++;
147 nonstandards[i++]=*c;
148 if ((sym)&&(*c!=*(c-1))) {
149 nonstandards[i++]=*c;
150 nonstandards[i++]=*(c-1);
157 istty = isatty(fileno(stdout))&&isatty(fileno(stdin));
159 /* print user help if we get input from tty */
162 printf("Use '&' to connect 2 sequences that shall form a complex.\n");
163 if(fold_constrained){
164 print_tty_constraint(VRNA_CONSTRAINT_DOT | VRNA_CONSTRAINT_X | VRNA_CONSTRAINT_ANG_BRACK | VRNA_CONSTRAINT_RND_BRACK);
165 print_tty_input_seq_str("Input sequence (upper or lower case) followed by structure constraint\n");
167 else print_tty_input_seq();
170 /* set options we wanna pass to read_record */
171 if(istty) read_opt |= VRNA_INPUT_NOSKIP_BLANK_LINES;
172 if(!fold_constrained) read_opt |= VRNA_INPUT_NO_REST;
174 P = get_scaled_parameters(temperature, md);
177 #############################################
178 # main loop: continue until end of file
179 #############################################
182 !((rec_type = read_record(&rec_id, &rec_sequence, &rec_rest, read_opt))
183 & (VRNA_INPUT_ERROR | VRNA_INPUT_QUIT))){
186 ########################################################
187 # init everything according to the data we've read
188 ########################################################
191 /* if(!istty) printf("%s\n", rec_id); */
192 (void) sscanf(rec_id, ">%" XSTR(FILENAME_ID_LENGTH) "s", fname);
194 else fname[0] = '\0';
198 rec_sequence = tokenize(rec_sequence); /* frees input_string and sets cut_point */
199 length = (int) strlen(rec_sequence);
200 structure = (char *) space((unsigned) length+1);
202 /* parse the rest of the current dataset to obtain a structure constraint */
203 if(fold_constrained){
206 unsigned int coptions = (rec_id) ? VRNA_CONSTRAINT_MULTILINE : 0;
207 coptions |= VRNA_CONSTRAINT_DOT | VRNA_CONSTRAINT_X | VRNA_CONSTRAINT_ANG_BRACK | VRNA_CONSTRAINT_RND_BRACK;
208 getConstraint(&cstruc, (const char **)rec_rest, coptions);
209 cstruc = tokenize(cstruc);
210 if(cut_point != cp) nrerror("cut point in sequence and structure constraint differs");
211 cl = (cstruc) ? (int)strlen(cstruc) : 0;
213 if(cl == 0) warn_user("structure constraint is missing");
214 else if(cl < length) warn_user("structure constraint is shorter than sequence");
215 else if(cl > length) nrerror("structure constraint is too long");
217 if(cstruc) strncpy(structure, cstruc, sizeof(char)*(cl+1));
220 /* convert DNA alphabet to RNA if not explicitely switched off */
221 if(!noconv) str_DNA2RNA(rec_sequence);
222 /* store case-unmodified sequence */
223 orig_sequence = strdup(rec_sequence);
224 /* convert sequence to uppercase letters only */
225 str_uppercase(rec_sequence);
229 printf("length = %d\n", length);
231 printf("length1 = %d\nlength2 = %d\n", cut_point-1, length-cut_point+1);
235 ########################################################
236 # begin actual computations
237 ########################################################
240 if((logML != 0 || dangles==1 || dangles==3) && dos == 0)
241 if(deltap<=0) deltap = delta/100. + 0.001;
243 print_energy = deltap;
245 /* stochastic backtracking */
249 if (fname[0] != '\0') printf(">%s\n", fname);
252 ss = (char *) space(strlen(rec_sequence)+1);
253 strncpy(ss, structure, length);
254 mfe = fold_par(rec_sequence, ss, P, fold_constrained, circular);
255 kT = (betaScale*((temperature+K0)*GASCONST))/1000.; /* in Kcal */
256 pf_scale = exp(-(1.03*mfe)/kT/length);
257 strncpy(ss, structure, length);
259 pf_parameters = get_boltzmann_factors(temperature, betaScale, md, pf_scale);
260 /* ignore return value, we are not interested in the free energy */
261 (void) pf_fold_par(rec_sequence, ss, pf_parameters, do_backtrack, fold_constrained, circular);
263 for (i=0; i<n_back; i++) {
265 s =(circular) ? pbacktrack_circ(rec_sequence) : pbacktrack(rec_sequence);
274 /* first lines of output (suitable for sort +1n) */
275 if (fname[0] != '\0')
276 printf("> %s [%d]\n", fname, delta);
278 subopt_par(rec_sequence, structure, P, delta, fold_constrained, circular, stdout);
281 for (i=0; i<= MAXDOS && i<=delta/10; i++) {
282 printf("%4d %6d\n", i, density_of_states[i]);
286 /* Zuker suboptimals */
290 if (fname[0] != '\0') printf(">%s\n%s\n", fname, rec_sequence);
293 nrerror("Sorry, zuker subopts not yet implemented for cofold\n");
295 zr = zukersubopt(rec_sequence);
297 (void)fflush(stdout);
298 for (i=0; zr[i].structure; i++) {
299 free(zr[i].structure);
304 (void)fflush(stdout);
307 if(cstruc) free(cstruc);
308 if(rec_id) free(rec_id);
312 /* free the rest of current dataset */
314 for(i=0;rec_rest[i];i++) free(rec_rest[i]);
317 rec_id = rec_sequence = orig_sequence = structure = cstruc = NULL;
320 /* print user help for the next round if we get input from tty */
323 printf("Use '&' to connect 2 sequences that shall form a complex.\n");
324 if(fold_constrained){
325 print_tty_constraint(VRNA_CONSTRAINT_DOT | VRNA_CONSTRAINT_X | VRNA_CONSTRAINT_ANG_BRACK | VRNA_CONSTRAINT_RND_BRACK);
326 print_tty_input_seq_str("Input sequence (upper or lower case) followed by structure constraint\n");
328 else print_tty_input_seq();
335 PRIVATE char *tokenize(char *line)
340 copy = (char *) space(strlen(line)+1);
341 (void) sscanf(line, "%s", copy);
342 pos = strchr(copy, '&');
344 cut = (int) (pos-copy)+1;
345 if (cut >= strlen(copy)) cut = -1;
346 if (strchr(pos+1, '&')) nrerror("more than one cut-point in input");
347 for (;*pos;pos++) *pos = *(pos+1); /* splice out the & */
350 if (cut_point==-1) cut_point = cut;
351 else if (cut_point != cut) {
352 fprintf(stderr,"cut_point = %d cut = %d\n", cut_point, cut);
353 nrerror("Sequence and Structure have different cut points.");
360 PRIVATE void putoutzuker(SOLUTION* zukersolution) {
362 printf("%s [%.2f]\n",zukersolution[0].structure,zukersolution[0].energy/100.);
363 for(i=1; zukersolution[i].structure; i++) {
364 printf("%s [%.2f]\n", zukersolution[i].structure, zukersolution[i].energy/100.);