1 /* Last changed Time-stamp: <2010-06-30 17:42:12 wolfgang> */
3 Compute pseudoknotted structure of an RNA
15 #include "fold_vars.h"
17 #include "energy_const.h"
19 #include "RNAPKplex_cmdl.h"
23 #include "read_epars.h"
26 int PlexHit_cmp (const void *c1, const void *c2);
27 int PlexHit_cmp_energy (const void *c1, const void *c2);
29 /*--------------------------------------------------------------------------*/
31 int main(int argc, char *argv[]) {
32 struct PKplex_args_info args_info;
33 char *id_s1, *s1, *orig_s1, *ParamFile, *ns_bases, *c, *plexstring, *constraint;
34 char fname[FILENAME_MAX_LENGTH], *temp, *annotation, **rest;
35 int istty, l, i, j, noconv, length, pairdist, current, unpaired;
36 int winsize, openenergies, sym;
37 double **pup = NULL; /*prob of being unpaired, lengthwise*/
38 FILE *pUfp = NULL, *spup = NULL;
39 plist *pl, *dpp = NULL;
40 float cutoff, constrainedEnergy;
43 unsigned int options=0;
56 ParamFile = ns_bases = NULL;
57 s1 = id_s1 = orig_s1 = NULL;
60 set_model_details(&md);
63 #############################################
64 # check command line parameters
65 #############################################
67 if(PKplex_cmdline_parser (argc, argv, &args_info) != 0) exit(1);
69 if(args_info.temp_given) temperature = args_info.temp_arg;
70 /* do not take special tetra loop energies into account */
71 if(args_info.noTetra_given) md.special_hp = tetra_loop=0;
72 /* do not allow weak pairs */
73 if(args_info.noLP_given) md.noLP = noLonelyPairs = 1;
74 /* do not allow wobble pairs (GU) */
75 if(args_info.noGU_given) md.noGU = noGU = 1;
76 /* do not allow weak closing pairs (AU,GU) */
77 if(args_info.noClosingGU_given) md.noGUclosure = no_closingGU = 1;
78 /* do not convert DNA nucleotide "T" to appropriate RNA "U" */
79 if(args_info.noconv_given) noconv = 1;
80 /* take another energy parameter set */
81 if(args_info.paramFile_given) ParamFile = strdup(args_info.paramFile_arg);
82 /* Allow other pairs in addition to the usual AU,GC,and GU pairs */
83 if(args_info.nsp_given) ns_bases = strdup(args_info.nsp_arg);
84 /* set the pair probability cutoff */
85 if(args_info.cutoff_given) cutoff = args_info.cutoff_arg;
86 /* turn on verbose output (mainly for debugging) */
87 if(args_info.verbose_given) verbose = 1;
88 /* set energy cutoff */
89 if(args_info.energyCutoff_given) pk_penalty = args_info.energyCutoff_arg;
90 /* show suboptimal structures which are better than given value difference */
91 if(args_info.subopts_given) subopts = args_info.subopts_arg;
92 /* free allocated memory of command line data structure */
93 PKplex_cmdline_parser_free(&args_info);
96 #############################################
98 #############################################
100 if (ParamFile != NULL) {
101 read_parameter_file(ParamFile);
104 if (ns_bases != NULL) {
105 nonstandards = space(33);
113 nonstandards[i++]=*c++;
114 nonstandards[i++]=*c;
115 if ((sym)&&(*c!=*(c-1))) {
116 nonstandards[i++]=*c;
117 nonstandards[i++]=*(c-1);
124 istty = isatty(fileno(stdout))&&isatty(fileno(stdin));
125 if(istty) options |= VRNA_INPUT_NOSKIP_BLANK_LINES;
126 options |= VRNA_INPUT_NO_REST;
127 if(istty) print_tty_input_seq();
130 #############################################
131 # main loop: continue until end of file
132 #############################################
134 while(!(read_record(&id_s1, &s1, &rest, options) & (VRNA_INPUT_ERROR | VRNA_INPUT_QUIT))){
136 ########################################################
137 # handle user input from 'stdin'
138 ########################################################
141 if(!istty) printf("%s\n", id_s1);
142 (void) sscanf(id_s1, ">%" XSTR(FILENAME_ID_LENGTH) "s", fname);
145 strcpy(fname, "PKplex");
147 strcat(fname, ".ps");
150 winsize=pairdist=length;
151 unpaired=MIN2(30, length-3);
153 /* convert DNA alphabet to RNA if not explicitely switched off */
154 if(!noconv) str_DNA2RNA(s1);
155 /* store case-unmodified sequence */
156 orig_s1 = strdup(s1);
157 /* convert sequence to uppercase letters only */
160 printf("%s\n", orig_s1);
161 if (verbose) printf("length = %d\n", length);
163 ########################################################
164 # do PLfold computations
165 ########################################################
167 update_fold_params();
172 pup =(double **) space((length+1)*sizeof(double *));
173 pup[0] =(double *) space(sizeof(double)); /*I only need entry 0*/
174 pup[0][0] = unpaired;
178 if (verbose) printf ("Winsize = %d\nPairdist = %d\nUnpaired = %d\n", winsize, pairdist, unpaired);
179 int tempdangles = dangles;
181 pl = pfl_fold(s1, winsize, pairdist, cutoff, pup, &dpp, pUfp, spup);
182 dangles = tempdangles;
185 ########################################################
186 # do Plex computations
187 ########################################################
190 PlexHits = (dupVar *)space(sizeof(dupVar) * PlexHitsArrayLength);
191 double kT= (temperature+K0)*GASCONST/1000.0;
193 /* prepare the accesibility array */
194 access = (int**) space(sizeof(int *) * (unpaired+2));
195 for(i=0; i< unpaired+2; i++){
196 access[i] =(int *) space(sizeof(int) * (length+20));
199 for(i=0;i<length+20;i++){
200 for(j=0;j<unpaired+2;j++){
205 for(i=11;i<length+11;i++){
206 for(j=1;j<unpaired+1;j++){
207 if (pup[i-10][j-1+1]>0) {
208 access[j][i]=rint(100*(-log(pup[i-10][j-1+1]))*kT);
213 access[0][0]=unpaired+2;
215 plexstring = (char *) space(length+1+20);
216 strcpy(plexstring,"NNNNNNNNNN"); /*add NNNNNNNNNN to avoid boundary check*/
217 strcat(plexstring, s1);
218 strcat(plexstring,"NNNNNNNNNN\0");
220 if (verbose) printf("EnergyCutoff = %f\n", pk_penalty);
221 PKLduplexfold_XS(plexstring, access, (int)(-pk_penalty*100)-1, MIN2(12, length-3), 0);
224 ########################################################
225 # analyze Plex output
226 ########################################################
230 PlexHits[NumberOfHits].tb=0;
231 PlexHits[NumberOfHits].te=0;
232 PlexHits[NumberOfHits].qb=0;
233 PlexHits[NumberOfHits].qe=0;
234 PlexHits[NumberOfHits].ddG=0;
235 PlexHits[NumberOfHits].dG1=0;
236 PlexHits[NumberOfHits].dG2=0;
237 PlexHits[NumberOfHits].energy=0;
238 PlexHits[NumberOfHits].structure=NULL;
241 /* first sort all the pre-results descending by their estimated energy */
242 qsort(PlexHits, NumberOfHits, sizeof(dupVar), PlexHit_cmp);
244 /* now we re-evaluate the energies and thereby prune the list of pre-results
245 such that the re-avaluation process is not done too often.
249 char *mfe_struct = NULL;
251 par = get_scaled_parameters(temperature, md);
252 constraint = (char *) space(length+1);
253 mfe_struct = (char *) space(length+1);
255 mfe = mfe_pk = fold_par(s1, mfe_struct, par, 0, 0);
257 printf("%s (%6.2f) [mfe-pkfree]\n", mfe_struct, mfe);
259 for(current = 0; current < NumberOfHits; current++){
260 /* do evaluation for structures above the subopt threshold only */
261 if(!PlexHits[current].inactive){
262 if(PlexHits[current].structure){
263 /* prepare the structure constraint for constrained folding */
264 for(i=0; i < length; i++) constraint[i] = '.';
265 for(i=PlexHits[current].tb - 1; i < PlexHits[current].te; i++) constraint[i] = 'x';
266 for(i=PlexHits[current].qb - 1; i < PlexHits[current].qe; i++) constraint[i] = 'x';
267 constraint[length]='\0';
269 /* energy evaluation */
270 constrainedEnergy = fold_par(s1, constraint, par, 1, 0);
272 /* check if this structure is worth keeping */
273 if(constrainedEnergy + PlexHits[current].ddG + pk_penalty <= mfe_pk + subopts){
274 /* add pseudo-knot brackets to the structure */
275 for(i=PlexHits[current].tb - 1; i < PlexHits[current].te; i++)
276 if(PlexHits[current].structure[i-PlexHits[current].tb+1]=='(')
278 for(i=PlexHits[current].qb - 1; i < PlexHits[current].qe; i++)
279 if(PlexHits[current].structure[i-PlexHits[current].qb+1+1+1+PlexHits[current].te-PlexHits[current].tb]==')')
281 PlexHits[current].energy = constrainedEnergy + PlexHits[current].ddG + (float) pk_penalty + PlexHits[current].dG1 + PlexHits[current].dG2;
282 if(PlexHits[current].energy < mfe_pk) mfe_pk = PlexHits[current].energy;
283 free(PlexHits[current].structure);
284 PlexHits[current].structure = strdup(constraint);
285 PlexHits[current].processed = 1;
287 PlexHits[current].inactive = 1;
290 PlexHits[current].energy = mfe;
291 if(mfe > mfe_pk + subopts) PlexHits[current].inactive = 1;
294 now go through the rest of the PlexHits array and mark all hits as inactive if they can
295 definetely not be within the results set according to current subopt settings
297 for(i = 0; i < NumberOfHits; i++){
298 if(!PlexHits[i].inactive){
299 if(PlexHits[i].structure){
300 if(!PlexHits[i].processed){
301 double cost = PlexHits[i].dG1;
302 if(PlexHits[i].dG2 > cost) cost = PlexHits[i].dG2;
303 cost += mfe + PlexHits[i].ddG + pk_penalty;
304 if(cost > mfe + subopts)
305 PlexHits[i].inactive = 1;
307 if(PlexHits[i].energy > mfe_pk + subopts)
308 PlexHits[i].inactive = 1;
311 if(mfe > mfe_pk + subopts)
312 PlexHits[i].inactive = 1;
321 /* now sort the actual results again according to their energy */
323 qsort(PlexHits, NumberOfHits, sizeof(dupVar), PlexHit_cmp_energy);
325 /* and print the results to stdout */
326 for(i = 0; i < NumberOfHits; i++){
327 if(!PlexHits[i].inactive){
328 if(PlexHits[i].structure)
329 printf("%s (%6.2f)\n", PlexHits[i].structure, PlexHits[i].energy);
331 printf("%s (%6.2f)\n", mfe_struct, mfe);
336 ########################################################
337 # Generate Plot for the best structure
338 ########################################################
341 annotation = (char *) space(sizeof(char)*300);
342 temp = (char *) space(sizeof(char)*300);
344 /* and print the results to stdout */
345 for(i = 0; i < NumberOfHits; i++){
346 if(!PlexHits[i].inactive){
347 if (PlexHits[i].structure){
348 annotation = (char *) space(sizeof(char)*300);
349 temp = (char *) space(sizeof(char)*300);
350 sprintf(temp, "%d %d 13 1 0 0 omark\n", (int) PlexHits[i].tb, PlexHits[i].te);
351 strcat(annotation, temp);
352 sprintf(temp, "%d %d 13 1 0 0 omark\n", (int) PlexHits[i].qb, PlexHits[i].qe);
353 strcat(annotation, temp);
354 sprintf(temp, "0 0 2 setrgbcolor\n2 setlinewidth\n%d cmark\n%d cmark\n1 setlinewidth", PlexHits[i].tb, PlexHits[i].qe);
355 strcat(annotation, temp);
356 PS_rna_plot_a(s1, PlexHits[i].structure, fname, annotation, "");
360 PS_rna_plot(s1, mfe_struct, fname);
369 for(i=0;i<NumberOfHits;i++) {
370 printf("%d\t%s %3d,%-3d : %3d,%-3d (%5.2f = %5.2f + %5.2f + %5.2f)\n", PlexHits[i].inactive, PlexHits[i].structure, PlexHits[i].tb, PlexHits[i].te, PlexHits[i].qb, PlexHits[i].qe, PlexHits[i].ddG, PlexHits[i].energy, PlexHits[i].dG1, PlexHits[i].dG2);
376 while((PlexHits[0].ddG+subopts>=PlexHits[current+1].ddG) && (current+1<NumberOfHits)) {
380 ########################################################
381 # Constrained RNAfold to reevaluate the actual energy
382 ########################################################
384 constraint = (char *) space(length+1);
385 for(i=0; i<length; i++) {
386 if((PlexHits[current].tb-1<=i) && (PlexHits[current].te-1>=i)) {
388 } else if((PlexHits[current].qb-1<=i) && (PlexHits[current].qe-1>=i)) {
394 constraint[length]='\0';
395 if (verbose) printf("Constrained structure:\n%s\n%s\n", orig_s1, constraint);
398 constrainedEnergy=fold(s1, constraint);
399 if (verbose) printf("%s %f\n", constraint, constrainedEnergy);
402 ########################################################
404 ########################################################
406 if (PlexHits[current].structure) {
407 for(i=PlexHits[current].tb-1; i<=PlexHits[current].te-1; i++) {
408 if(PlexHits[current].structure[i-PlexHits[current].tb+1]=='(') {
412 for(i=PlexHits[current].qb-1; i<=PlexHits[current].qe-1; i++) {
413 if(PlexHits[current].structure[i-PlexHits[current].qb+1+1+1+PlexHits[current].te-PlexHits[current].tb]==')'){
419 if (PlexHits[current].structure) {
420 printf("%s (%3.2f)\n", constraint, constrainedEnergy+PlexHits[current].ddG-(float) pk_penalty);
422 printf("%s (%3.2f)\n", constraint, constrainedEnergy+PlexHits[current].ddG);
427 ########################################################
428 # Generate Visualization
429 ########################################################
432 annotation = (char *) space(sizeof(char)*300);
433 temp = (char *) space(sizeof(char)*300);
435 if (PlexHits[current].te) {
439 for (i=1; PlexHits[current].structure[i]!=')'; i++) {
440 if ((stem) && (PlexHits[current].structure[i]!='(')) {
443 sprintf(temp, "%d %d 13 1 0 0 omark\n", (int) PlexHits[current].tb+start, PlexHits[current].tb+end);
444 strcat(annotation, temp);
446 if ((!stem) && (PlexHits[current].structure[i]=='(')) {
453 for (i; i<=strlen(PlexHits[current].structure); i++) {
454 if ((stem) && (PlexHits[current].structure[i]!=')')) {
457 sprintf(temp, "%d %d 13 1 0 0 omark\n", PlexHits[current].qb+start-PlexHits[current].te+PlexHits[current].tb-2, PlexHits[current].qb+end-PlexHits[current].te+PlexHits[current].tb-2);
458 strcat(annotation, temp);
460 if ((!stem) && (PlexHits[current].structure[i]==')')) {
466 sprintf(temp, "0 0 2 setrgbcolor\n2 setlinewidth\n%d cmark\n%d cmark\n1 setlinewidth", PlexHits[current].tb, PlexHits[current].qe);
467 strcat(annotation, temp);
468 PS_rna_plot_a(s1, constraint, fname, annotation, "");
472 PS_rna_plot(s1, constraint, fname);
479 ########################################################
481 ########################################################
486 (void) fflush(stdout);
500 s1 = id_s1 = orig_s1 = NULL;
502 /* print user help for the next round if we get input from tty */
503 if(istty) print_tty_input_seq();
508 int PlexHit_cmp (const void *c1, const void *c2) {
509 dupVar *p1=(dupVar *)c1;
510 dupVar *p2=(dupVar *)c2;
511 return (p1->ddG >= p2->ddG);
514 int PlexHit_cmp_energy (const void *c1, const void *c2) {
515 dupVar *p1=(dupVar *)c1;
516 dupVar *p2=(dupVar *)c2;
517 if(p1->energy > p2->energy) return 1;
518 else if(p1->energy < p2->energy) return -1;