10 #include "fold_vars.h"
13 #include "energy_par.h"
14 #include "read_epars.h"
17 #include "RNA2Dfold_cmdl.h"
23 typedef struct nbhoods{
29 int main(int argc, char *argv[]){
30 struct RNA2Dfold_args_info args_info;
31 unsigned int input_type;
32 char *string, *input_string, *orig_sequence;
33 char *mfe_structure=NULL, *structure1=NULL, *structure2=NULL, *reference_struc1=NULL, *reference_struc2=NULL;
37 double kT, sfact=1.07;
41 int maxDistance1 = -1;
42 int maxDistance2 = -1;
48 struct nbhoods *neighborhoods = NULL;
49 struct nbhoods *neighborhoods_cur = NULL;
51 string = input_string = orig_sequence = NULL;
53 #############################################
54 # check the command line prameters
55 #############################################
57 if(RNA2Dfold_cmdline_parser (argc, argv, &args_info) != 0) exit(1);
60 if(args_info.temp_given)
61 temperature = args_info.temp_arg;
63 /* max distance to 1st reference structure */
64 if(args_info.maxDist1_given)
65 maxDistance1 = args_info.maxDist1_arg;
67 /* max distance to 2nd reference structure */
68 if(args_info.maxDist2_given)
69 maxDistance2 = args_info.maxDist2_arg;
71 /* compute partition function and boltzmann probabilities */
72 if(args_info.partfunc_given)
75 /* do stachastic backtracking */
76 if(args_info.stochBT_given){
79 nstBT = args_info.stochBT_arg;
82 if(args_info.noTetra_given)
85 /* assume RNA sequence to be circular */
86 if(args_info.circ_given)
90 if(args_info.dangles_given){
91 if((args_info.dangles_arg != 0) && (args_info.dangles_arg != 2))
92 warn_user("required dangle model not implemented, falling back to default dangles=2");
94 dangles = args_info.dangles_arg;
96 /* set number of threads for parallel computation */
97 if(args_info.numThreads_given)
99 omp_set_num_threads(args_info.numThreads_arg);
101 nrerror("\'j\' option is available only if compiled with OpenMP support!");
104 /* get energy parameter file name */
105 if(args_info.parameterFile_given)
106 ParamFile = strdup(args_info.parameterFile_arg);
108 /* do not allow GU pairs ? */
109 if(args_info.noGU_given)
112 /* do not allow GU pairs at the end of helices? */
113 if(args_info.noClosingGU_given)
116 /* pf scaling factor */
117 if(args_info.pfScale_given)
118 sfact = args_info.pfScale_arg;
120 /* do not backtrack structures ? */
121 if(args_info.noBT_given)
124 for (i = 0; i < args_info.neighborhood_given; i++){
127 if(sscanf(args_info.neighborhood_arg[i], "%d:%d", &kappa, &lambda) == 2);
128 if ((kappa>-2) && (lambda>-2)){
129 if(neighborhoods_cur != NULL){
130 neighborhoods_cur->next = (nbhoods *)space(sizeof(nbhoods));
131 neighborhoods_cur = neighborhoods_cur->next;
134 neighborhoods = (nbhoods *)space(sizeof(nbhoods));
135 neighborhoods_cur = neighborhoods;
137 neighborhoods_cur->k = kappa;
138 neighborhoods_cur->l = lambda;
139 neighborhoods_cur->next = NULL;
142 /* free allocated memory of command line data structure */
143 RNA2Dfold_cmdline_parser_free (&args_info);
146 #############################################
147 # begin actual program code
148 #############################################
150 if (ParamFile != NULL)
151 read_parameter_file(ParamFile);
153 istty = isatty(fileno(stdout))&&isatty(fileno(stdin));
156 #############################################
157 # main loop, continue until end of file
158 #############################################
162 print_tty_input_seq_str("Input strings\n1st line: sequence (upper or lower case)\n2nd + 3rd line: reference structures (dot bracket notation)\n@ to quit\n");
164 while((input_type = get_input_line(&input_string, 0)) & VRNA_INPUT_FASTA_HEADER){
165 printf(">%s\n", input_string); /* print fasta header if available */
169 /* break on any error, EOF or quit request */
170 if(input_type & (VRNA_INPUT_QUIT | VRNA_INPUT_ERROR)){ break;}
171 /* else assume a proper sequence of letters of a certain alphabet (RNA, DNA, etc.) */
173 length = (int) strlen(input_string);
174 string = strdup(input_string);
178 mfe_structure = (char *) space((unsigned) length+1);
179 structure1 = (char *) space((unsigned) length+1);
180 structure2 = (char *) space((unsigned) length+1);
182 input_type = get_input_line(&input_string, VRNA_INPUT_NOSKIP_COMMENTS);
183 if(input_type & VRNA_INPUT_QUIT){ break;}
184 else if((input_type & VRNA_INPUT_MISC) && (strlen(input_string) > 0)){
185 reference_struc1 = strdup(input_string);
187 if(strlen(reference_struc1) != length)
188 nrerror("sequence and 1st reference structure have unequal length");
190 else nrerror("1st reference structure missing\n");
191 strncpy(structure1, reference_struc1, length);
193 input_type = get_input_line(&input_string, VRNA_INPUT_NOSKIP_COMMENTS);
194 if(input_type & VRNA_INPUT_QUIT){ break;}
195 else if((input_type & VRNA_INPUT_MISC) && (strlen(input_string) > 0)){
196 reference_struc2 = strdup(input_string);
198 if(strlen(reference_struc2) != length)
199 nrerror("sequence and 2nd reference structure have unequal length");
201 else nrerror("2nd reference structure missing\n");
202 strncpy(structure2, reference_struc2, length);
204 /* convert DNA alphabet to RNA if not explicitely switched off */
205 if(!noconv) str_DNA2RNA(string);
206 /* store case-unmodified sequence */
207 orig_sequence = strdup(string);
208 /* convert sequence to uppercase letters only */
209 str_uppercase(string);
211 if (istty) printf("length = %d\n", length);
213 min_en = (circ) ? circfold(string, mfe_structure) : fold(string, mfe_structure);
215 printf("%s\n%s", orig_sequence, mfe_structure);
218 printf("\n minimum free energy = %6.2f kcal/mol\n", min_en);
220 printf(" (%6.2f)\n", min_en);
222 printf("%s (%6.2f) <ref 1>\n", structure1, (circ) ? energy_of_circ_structure(string, structure1, 0) : energy_of_structure(string,structure1, 0));
223 printf("%s (%6.2f) <ref 2>\n", structure2, (circ) ? energy_of_circ_structure(string, structure2, 0) : energy_of_structure(string,structure2, 0));
225 /* get all variables need for the folding process (some memory will be preallocated here too) */
226 TwoDfold_vars *mfe_vars = get_TwoDfold_variables(string, structure1, structure2, circ);
227 mfe_vars->do_backtrack = do_backtrack;
228 TwoDfold_solution *mfe_s = TwoDfoldList(mfe_vars, maxDistance1, maxDistance2);
232 printf("k\tl\tn\tMFE\tMFE-structure\n");
233 for(i = 0; mfe_s[i].k != INF; i++){
234 printf("%d\t%d\t%lu\t%6.2f\t%s\n", mfe_s[i].k, mfe_s[i].l, mfe_vars->N_F5[length][mfe_s[i].k][mfe_s[i].l/2], mfe_s[i].en, mfe_s[i].s);
235 if(mfe_s[i].s) free(mfe_s[i].s);
239 printf("k\tl\tMFE\tMFE-structure\n");
240 for(i = 0; mfe_s[i].k != INF; i++){
241 printf("%d\t%d\t%6.2f\t%s\n", mfe_s[i].k, mfe_s[i].l, mfe_s[i].en, mfe_s[i].s);
242 if(mfe_s[i].s) free(mfe_s[i].s);
249 int maxD1 = (int) mfe_vars->maxD1;
250 int maxD2 = (int) mfe_vars->maxD2;
253 for(i = 0; mfe_s[i].k != INF; i++){
254 if(mmfe > mfe_s[i].en) mmfe = mfe_s[i].en;
256 kT = (temperature+K0)*GASCONST/1000.0; /* in Kcal */
257 pf_scale = exp(-(sfact*mmfe)/kT/length);
259 fprintf(stdout, "scaling factor %f\n", pf_scale);
261 /* get all variables need for the folding process (some memory will be preallocated there too) */
262 /* TwoDpfold_vars *q_vars = get_TwoDpfold_variables_from_MFE(mfe_vars); */
263 /* we dont need the mfe vars and arrays anymore, so we can savely free their occupying memory */
264 destroy_TwoDfold_variables(mfe_vars);
265 TwoDpfold_vars *q_vars = get_TwoDpfold_variables(string, structure1, structure2, circ);
267 TwoDpfold_solution *pf_s = TwoDpfoldList(q_vars, maxD1, maxD2);
271 for(i = 0; pf_s[i].k != INF; i++){
275 double fee = (-log(Q)-length*log(pf_scale))*kT;
278 printf("free energy of ensemble = %6.2f kcal/mol\n",fee);
279 printf("k\tl\tP(neighborhood)\tP(MFE in neighborhood)\tP(MFE in ensemble)\tMFE\tE_gibbs\tMFE-structure\n");
280 for(i=0; pf_s[i].k != INF;i++){
281 float free_energy = (-log((float)pf_s[i].q)-length*log(pf_scale))*kT;
282 if((pf_s[i].k != mfe_s[i].k) || (pf_s[i].l != mfe_s[i].l))
283 nrerror("This should never happen!");
285 "%d\t%d\t%2.8f\t%2.8f\t%2.8f\t%6.2f\t%6.2f\t%s\n",
288 (float)(pf_s[i].q)/(float)Q,
289 exp((free_energy-mfe_s[i].en)/kT),
290 exp((fee-mfe_s[i].en)/kT),
298 printf("k\tl\ten\tstructure\n");
299 if(neighborhoods != NULL){
301 for(tmp = neighborhoods; tmp != NULL; tmp = tmp->next){
305 for(i = 0; i < nstBT; i++){
306 char *s = TwoDpfold_pbacktrack(q_vars, k, l);
307 printf("%d\t%d\t%6.2f\t%s\n", k, l, q_vars->circ ? energy_of_circ_structure(q_vars->sequence, s, 0) : energy_of_structure(q_vars->sequence, s, 0), s);
312 for(i=0; pf_s[i].k != INF;i++){
313 for(l = 0; l < nstBT; l++){
314 char *s = TwoDpfold_pbacktrack(q_vars, pf_s[i].k, pf_s[i].l);
315 printf("%d\t%d\t%6.2f\t%s\n", pf_s[i].k, pf_s[i].l, q_vars->circ ? energy_of_circ_structure(q_vars->sequence, s, 0) : energy_of_structure(q_vars->sequence, s, 0), s);
322 for(i=0; mfe_s[i].k != INF;i++){
323 if(mfe_s[i].s) free(mfe_s[i].s);
327 /* destroy the q_vars */
328 destroy_TwoDpfold_variables(q_vars);
331 destroy_TwoDfold_variables(mfe_vars);
339 free(reference_struc1);
340 free(reference_struc2);
341 string = orig_sequence = mfe_structure = NULL;